---
title: "Fresh Thesis"
author: "Dian Yu"
date: "2026-01-24"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
setwd("~/Desktop/Thesis/NAAS2016/DS0001")
rm(list=ls())
cat('\014')

library(tidyverse)
library(haven)
library(kableExtra)

NAAS2016 <- load("NAAS2016.rda")
```

```{r}
NAAS2016 <- da37380.0001 %>%
  mutate(across(everything(), haven::as_factor))

naas_summary <- NAAS2016 %>%
  mutate(
    # Ethnicity
    ethnicity = case_when(
      RETHNICS == "(01) a Bangladeshi" ~ "Bangladeshi",
      RETHNICS == "(02) a Cambodian" ~ "Cambodian",
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(05) a Hmong" ~ "Hmong",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(09) a Laotian" ~ "Laotian",
      RETHNICS == "(10) a Pakistani" ~ "Pakistani",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      RETHNICS == "(12) a Native Hawaiian" ~ "Native Hawaiian",
      RETHNICS == "(13) a Samoan" ~ "Samoan",
      TRUE ~ "Other/Latino"
    ),
    
    # Nativity & Generation
    nativity = if_else(S9 == "(1) United States", "US-born", "Foreign-born"),
    mother_fb = Q1_1 == "(2) No",
    father_fb = Q1_2 == "(2) No",
    parents_nativity = case_when(
      mother_fb & father_fb ~ "Both parents foreign-born",
      !mother_fb & !father_fb ~ "Both parents US-born",
      TRUE ~ "One parent US-born"
    ),
    
    # Generation: 2nd gen = US-born with BOTH parents foreign-born
    generation = case_when(
      nativity == "Foreign-born" ~ "1st generation",
      nativity == "US-born" & parents_nativity == "Both parents foreign-born" ~ "2nd generation",
      nativity == "US-born" & parents_nativity == "Both parents US-born" ~ "3rd+ generation",
      TRUE ~ NA_character_  # US-born with one parent US-born = excluded
    ),
    
    # Demographics
    age_35plus = AGE2 == "(2) 35 or above",
    age_group = if_else(age_35plus, "35 and over", "Under 35"),
    male = S7 == "(1) Male",
    gender = if_else(S7 == "(1) Male", "Male", "Female"),
    
    # Education
    education = case_when(
      S8 %in% c("(1) No schooling completed", "(2) Some schooling, no high school degree") ~ "Less than HS",
      S8 == "(3) High school degree/GED" ~ "High school",
      S8 == "(4) Some college, but no degree" ~ "Some college",
      S8 == "(5) College degree or Bachelor's degree" ~ "Bachelor's",
      S8 == "(6) Graduate or Professional degree" ~ "Graduate",
      TRUE ~ NA_character_
    ),
    college_plus = education %in% c("Bachelor's", "Graduate"),
    
    # Income
    income = case_when(
      Q10_15 == "(1) Up to $20,000" ~ "Under $20k",
      Q10_15 == "(2) $20,000 to $50,000" ~ "$20k-$50k",
      Q10_15 == "(3) $50,000 to $75,000" ~ "$50k-$75k",
      Q10_15 == "(4) $75,000 to $100,000" ~ "$75k-$100k",
      Q10_15 == "(5) $100,000 to $125,000" ~ "$100k-$125k",
      Q10_15 == "(6) $125,000 to $250,000" ~ "$125k-$250k",
      Q10_15 == "(7) $250,000 and over" ~ "$250k+",
      TRUE ~ NA_character_
    ),
    income_100k_plus = income %in% c("$100k-$125k", "$125k-$250k", "$250k+"),
    
    # Party ID
    party = case_when(
      Q10_0A == "(1) Democrat" ~ "Democrat",
      Q10_0A == "(2) Republican" ~ "Republican",
      Q10_0A == "(3) Independent" ~ "Independent",
      Q10_0A == "(4) In terms of some other party" ~ "Other party",
      Q10_0A == "(5) Do not think in terms of political parties" ~ "No party affiliation",
      TRUE ~ NA_character_
    ),
    neither_dem_rep = party %in% c("Independent", "Other party", "No party affiliation"),
    
    # Religion
    religion = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 == "(04) Buddhist" ~ "Buddhist",
      Q10_21 == "(15) Hindu" ~ "Hindu",
      Q10_21 == "(22) Muslim, Mohammedan, Islam" ~ "Muslim",
      Q10_21 == "(29) Sikh" ~ "Sikh",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other religion"
    ),
    
    # Citizenship & Registration
    citizen = case_when(
      S9 == "(1) United States" ~ "US citizen (born)",
      Q1_4 == "(1) U.S. Citizen" ~ "US citizen (naturalized)",
      TRUE ~ "Non-citizen"
    ),
    registered = Q2_3 == "(1) Yes, registered"
  ) %>%
  mutate(
    education = factor(education, levels = c("Less than HS", "High school", "Some college", "Bachelor's", "Graduate")),
    income = factor(income, levels = c("Under $20k", "$20k-$50k", "$50k-$75k", "$75k-$100k", 
                                       "$100k-$125k", "$125k-$250k", "$250k+")),
    generation = factor(generation, levels = c("1st generation", "2nd generation", "3rd+ generation")),
    party = factor(party, levels = c("Democrat", "Republican", "Independent", "Other party", "No party affiliation"))
  )


calc_stats <- function(var, label) {
  tibble(
    Characteristic = label,
    N = sum(var, na.rm = TRUE),
    Percent = round(100 * mean(var, na.rm = TRUE), 1)
  )
}

make_category_block <- function(data, var, label) {
  data %>%
    filter(!is.na({{ var }})) %>%
    count({{ var }}) %>%
    mutate(
      Characteristic = label,
      Category = as.character({{ var }}),
      N = n,
      Percent = round(100 * n / sum(n), 1)
    ) %>%
    select(Characteristic, Category, N, Percent)
}

# Helper function for party crosstabs
make_party_crosstab <- function(data, groupvar, group_label) {
  data %>%
    filter(!is.na(party), !is.na({{ groupvar }})) %>%
    count({{ groupvar }}, party) %>%
    group_by({{ groupvar }}) %>%
    mutate(
      total = sum(n),
      percent = round(100 * n / sum(n), 1)
    ) %>%
    ungroup() %>%
    select({{ groupvar }}, party, n, percent) %>%
    pivot_wider(
      names_from = party,
      values_from = c(n, percent),
      values_fill = 0,
      names_glue = "{party}_{.value}"
    ) %>%
    mutate(
      Group_Type = group_label,
      Total_N = rowSums(across(ends_with("_n")))
    ) %>%
    select(Group_Type, Group = {{ groupvar }}, Total_N, ends_with("_percent")) %>%
    rename_with(~str_remove(., "_percent"), ends_with("_percent"))
}

# TABLE 1: BASIC SUMMARY (removed "Both parents foreign-born" row)
basic_summary <- bind_rows(
  tibble(Characteristic = "Total Sample", N = nrow(naas_summary), Percent = 100.0),
  calc_stats(naas_summary$nativity == "Foreign-born", "Foreign-born (1st gen)"),
  calc_stats(naas_summary$generation == "2nd generation", "2nd generation (US-born, both parents foreign-born)"),
  calc_stats(naas_summary$male, "Male"),
  calc_stats(naas_summary$age_35plus, "Age 35+"),
  calc_stats(naas_summary$college_plus, "Bachelor's degree or higher"),
  calc_stats(naas_summary$income_100k_plus, "Household income $100k+"),
  calc_stats(naas_summary$party == "Democrat", "Democrat"),
  calc_stats(naas_summary$party == "Republican", "Republican"),
  calc_stats(naas_summary$neither_dem_rep, "Neither Democrat nor Republican"),
  calc_stats(naas_summary$citizen %in% c("US citizen (born)", "US citizen (naturalized)"), "US Citizen"),
  calc_stats(naas_summary$registered, "Registered to vote")
)

kable(basic_summary,
      caption = "Table 1: NAAS 2016 Basic Sample Characteristics",
      format = "html",
      digits = 1,
      col.names = c("Characteristic", "N", "%")) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  row_spec(0, bold = TRUE) %>%
  row_spec(1, bold = TRUE, background = "#f0f0f0") %>%
  add_footnote(
    label = "2nd generation defined as US-born with both parents foreign-born",
    notation = "symbol"
  )

# TABLE 2: DETAILED BREAKDOWN
detailed_summary <- bind_rows(
  tibble(Characteristic = "Total Sample", Category = "N", N = nrow(naas_summary), Percent = NA_real_),
  make_category_block(naas_summary, ethnicity, "Ethnicity"),
  make_category_block(naas_summary, nativity, "Nativity"),
  make_category_block(naas_summary, generation, "Generation"),
  make_category_block(naas_summary, parents_nativity, "Parents' Nativity"),
  tibble(Characteristic = "Age", Category = "Age 35+", 
         N = sum(naas_summary$age_35plus, na.rm = TRUE),
         Percent = round(100 * mean(naas_summary$age_35plus, na.rm = TRUE), 1)),
  tibble(Characteristic = "Age", Category = "Under 35", 
         N = sum(!naas_summary$age_35plus, na.rm = TRUE),
         Percent = round(100 * mean(!naas_summary$age_35plus, na.rm = TRUE), 1)),
  tibble(Characteristic = "Gender", Category = "Male", 
         N = sum(naas_summary$male, na.rm = TRUE),
         Percent = round(100 * mean(naas_summary$male, na.rm = TRUE), 1)),
  tibble(Characteristic = "Gender", Category = "Female", 
         N = sum(!naas_summary$male, na.rm = TRUE),
         Percent = round(100 * mean(!naas_summary$male, na.rm = TRUE), 1)),
  make_category_block(naas_summary, education, "Education"),
  make_category_block(naas_summary, income, "Household Income"),
  make_category_block(naas_summary, party, "Party Identification"),
  make_category_block(naas_summary, religion, "Religion"),
  make_category_block(naas_summary, citizen, "Citizenship"),
  tibble(Characteristic = "Voter Registration", Category = "Registered", 
         N = sum(naas_summary$registered, na.rm = TRUE),
         Percent = round(100 * mean(naas_summary$registered, na.rm = TRUE), 1)),
  tibble(Characteristic = "Voter Registration", Category = "Not registered", 
         N = sum(!naas_summary$registered, na.rm = TRUE),
         Percent = round(100 * mean(!naas_summary$registered, na.rm = TRUE), 1))
)

kable(detailed_summary,
      caption = "Table 2: NAAS 2016 Detailed Sample Characteristics",
      format = "html",
      digits = 1,
      col.names = c("Characteristic", "Category", "N", "%")) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  row_spec(0, bold = TRUE) %>%
  row_spec(1, bold = TRUE, background = "#f0f0f0") %>%
  collapse_rows(columns = 1, valign = "top") %>%
  add_footnote(
    label = "2nd generation = US-born with both parents foreign-born; 3rd+ generation = US-born with both parents US-born",
    notation = "symbol"
  )

library(kableExtra)
library(webshot2)

# Create the clean academic table
academic_table <- detailed_summary %>%
  kable(
    format = "html",
    caption = NULL,  # We'll add title separately for more control
    digits = 1,
    col.names = c("Characteristic", "Category", "N", "%"),
    align = c("l", "l", "r", "r")
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "condensed"),
    full_width = FALSE,
    position = "center",
    font_size = 12
  ) %>%
  row_spec(0, bold = TRUE, font_size = 13, background = "#f8f9fa", 
           extra_css = "border-top: 2px solid #000; border-bottom: 1px solid #000;") %>%
  collapse_rows(columns = 1, valign = "top") %>%
  footnote(
    general = "2nd generation = US-born with both parents foreign-born; 3rd+ generation = US-born with both parents US-born.",
    general_title = "Notes:",
    footnote_as_chunk = TRUE,
    title_format = c("italic", "underline")
  ) %>%
  gsub("Table:", "<div style='text-align:center; margin-bottom:20px;'><b style='font-size:16px;'>Table 2: NAAS 2016 Detailed Sample Characteristics</b></div>", .)

# Add stargazer-style borders
academic_table <- academic_table %>%
  gsub("<table", "<table style='border-collapse: collapse; border-top: 2px solid #000; border-bottom: 2px solid #000;'", .)

# Print the table
academic_table

# Save as HTML
cat(academic_table, file = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table2_Sample_Characteristics_Academic.html")

# Convert to PNG
webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table2_Sample_Characteristics_Academic.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table2_Sample_Characteristics_Academic.png",
        vwidth = 900, vheight = 1200, zoom = 2)

# TABLE 3: COMBINED PARTISANSHIP BREAKDOWN
party_combined <- bind_rows(
  make_party_crosstab(naas_summary, ethnicity, "Ethnicity"),
  make_party_crosstab(
    naas_summary %>% filter(generation %in% c("1st generation", "2nd generation")),
    generation, "Generation"
  ),
  make_party_crosstab(naas_summary, age_group, "Age"),
  make_party_crosstab(naas_summary, gender, "Gender")
)

kable(party_combined,
      caption = "Table 3: Party Identification by Demographics (row percentages)",
      format = "html",
      digits = 1,
      col.names = c("Category", "Group", "N", "Dem %", "Rep %", "Ind %", "Other %", "No party %")) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  row_spec(0, bold = TRUE) %>%
  collapse_rows(columns = 1, valign = "top")

#INDIVIDUAL TABLES

# Table 3a: Party by Ethnicity
party_ethnicity <- naas_summary %>%
  filter(!is.na(party), !is.na(ethnicity)) %>%
  count(ethnicity, party) %>%
  group_by(ethnicity) %>%
  mutate(
    total = sum(n),
    percent = round(100 * n / sum(n), 1)
  ) %>%
  ungroup() %>%
  select(ethnicity, party, n, percent) %>%
  pivot_wider(
    names_from = party,
    values_from = c(n, percent),
    values_fill = 0,
    names_glue = "{party}_{.value}"
  ) %>%
  mutate(Total_N = rowSums(across(ends_with("_n")))) %>%
  select(ethnicity, Total_N, ends_with("_percent")) %>%
  rename_with(~str_remove(., "_percent"), ends_with("_percent")) %>%
  arrange(desc(Total_N))

# Table 3b: Party by Generation
party_generation <- naas_summary %>%
  filter(!is.na(party), generation %in% c("1st generation", "2nd generation")) %>%
  count(generation, party) %>%
  group_by(generation) %>%
  mutate(
    total = sum(n),
    percent = round(100 * n / sum(n), 1)
  ) %>%
  ungroup() %>%
  select(generation, party, n, percent) %>%
  pivot_wider(
    names_from = party,
    values_from = c(n, percent),
    values_fill = 0,
    names_glue = "{party}_{.value}"
  ) %>%
  mutate(Total_N = rowSums(across(ends_with("_n")))) %>%
  select(generation, Total_N, ends_with("_percent")) %>%
  rename_with(~str_remove(., "_percent"), ends_with("_percent"))

# Table 3c: Party by Age
party_age <- naas_summary %>%
  filter(!is.na(party), !is.na(age_group)) %>%
  count(age_group, party) %>%
  group_by(age_group) %>%
  mutate(
    total = sum(n),
    percent = round(100 * n / sum(n), 1)
  ) %>%
  ungroup() %>%
  select(age_group, party, n, percent) %>%
  pivot_wider(
    names_from = party,
    values_from = c(n, percent),
    values_fill = 0,
    names_glue = "{party}_{.value}"
  ) %>%
  mutate(Total_N = rowSums(across(ends_with("_n")))) %>%
  select(age_group, Total_N, ends_with("_percent")) %>%
  rename_with(~str_remove(., "_percent"), ends_with("_percent")) %>%
  arrange(age_group)

# Table 3d: Party by Gender
party_gender <- naas_summary %>%
  filter(!is.na(party), !is.na(gender)) %>%
  count(gender, party) %>%
  group_by(gender) %>%
  mutate(
    total = sum(n),
    percent = round(100 * n / sum(n), 1)
  ) %>%
  ungroup() %>%
  select(gender, party, n, percent) %>%
  pivot_wider(
    names_from = party,
    values_from = c(n, percent),
    values_fill = 0,
    names_glue = "{party}_{.value}"
  ) %>%
  mutate(Total_N = rowSums(across(ends_with("_n")))) %>%
  select(gender, Total_N, ends_with("_percent")) %>%
  rename_with(~str_remove(., "_percent"), ends_with("_percent"))

# ========== CREATE OUTPUT DIRECTORY ==========
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

# ========== SAVE ALL TABLES ==========
save_kable(
  kable(basic_summary, format = "html", digits = 1,
        col.names = c("Characteristic", "N", "%")) %>%
    kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
    add_footnote(
      label = "2nd generation defined as US-born with both parents foreign-born",
      notation = "symbol"
    ),
  file = "~/Desktop/Thesis/NAAS2016/Fresh Output/NAAS2016_BasicSummary.html"
)

save_kable(
  kable(detailed_summary, format = "html", digits = 1,
        col.names = c("Characteristic", "Category", "N", "%")) %>%
    kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
    collapse_rows(columns = 1, valign = "top") %>%
    add_footnote(
      label = "2nd generation = US-born with both parents foreign-born; 3rd+ generation = US-born with both parents US-born",
      notation = "symbol"
    ),
  file = "~/Desktop/Thesis/NAAS2016/Fresh Output/NAAS2016_DetailedSummary.html"
)

save_kable(
  kable(party_combined, format = "html", digits = 1,
        col.names = c("Category", "Group", "N", "Dem %", "Rep %", "Ind %", "Other %", "No party %")) %>%
    kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
    collapse_rows(columns = 1, valign = "top"),
  file = "~/Desktop/Thesis/NAAS2016/Fresh Output/NAAS2016_Party_Combined.html"
)

# Save individual tables too
save_kable(
  kable(party_ethnicity, format = "html", digits = 1,
        col.names = c("Ethnicity", "N", "Dem %", "Rep %", "Ind %", "Other %", "No party %")) %>%
    kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")),
  file = "~/Desktop/Thesis/NAAS2016/Fresh Output/NAAS2016_Party_by_Ethnicity.html"
)

save_kable(
  kable(party_generation, format = "html", digits = 1,
        col.names = c("Generation", "N", "Dem %", "Rep %", "Ind %", "Other %", "No party %")) %>%
    kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")),
  file = "~/Desktop/Thesis/NAAS2016/Fresh Output/NAAS2016_Party_by_Generation.html"
)

save_kable(
  kable(party_age, format = "html", digits = 1,
        col.names = c("Age Group", "N", "Dem %", "Rep %", "Ind %", "Other %", "No party %")) %>%
    kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")),
  file = "~/Desktop/Thesis/NAAS2016/Fresh Output/NAAS2016_Party_by_Age.html"
)

save_kable(
  kable(party_gender, format = "html", digits = 1,
        col.names = c("Gender", "N", "Dem %", "Rep %", "Ind %", "Other %", "No party %")) %>%
    kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")),
  file = "~/Desktop/Thesis/NAAS2016/Fresh Output/NAAS2016_Party_by_Gender.html"
)

cat("\n✓ All summary tables created and saved successfully\n")
cat("  - NAAS2016_BasicSummary.html\n")
cat("  - NAAS2016_DetailedSummary.html\n")
cat("  - NAAS2016_Party_Combined.html\n")
cat("  - NAAS2016_Party_by_Ethnicity.html\n")
cat("  - NAAS2016_Party_by_Generation.html\n")
cat("  - NAAS2016_Party_by_Age.html\n")
cat("  - NAAS2016_Party_by_Gender.html\n")
cat("\nGeneration definitions:\n")
cat("  • 1st generation: Foreign-born\n")
cat("  • 2nd generation: US-born with BOTH parents foreign-born\n")
cat("  • 3rd+ generation: US-born with both parents US-born\n")
cat("  • Those with one US-born parent are excluded from generation analysis\n")
```
Moral Conservatism 1st VS 2nd Gen
```{r}
# ============================================================
# LGBTQ MORAL CONSERVATISM: 1st vs 2nd GENERATION
# ============================================================

library(tidyverse)
library(kableExtra)

# ---- ADD LGBTQ VARIABLES TO EXISTING naas_summary ----
naas_lgbtq <- naas_summary %>%
  mutate(
    # COMBINED: Q8_1A (GLBT) and Q8_1B (GL) - asked to different respondents
    # Use coalesce to take whichever one is non-missing
    lgbtq_gl_combined = case_when(
      coalesce(Q8_1A, Q8_1B) %in% c("(4) Favor", "(5) Strongly favor") ~ "Favor",
      coalesce(Q8_1A, Q8_1B) == "(3) Neither favor nor oppose" ~ "Neither",
      coalesce(Q8_1A, Q8_1B) %in% c("(1) Strongly oppose", "(2) Oppose") ~ "Oppose",
      TRUE ~ NA_character_
    ),
    
    # Q8_1C: Transgender bathroom access
    lgbtq_trans_bathroom = case_when(
      Q8_1C %in% c("(4) Favor", "(5) Strongly favor") ~ "Favor",
      Q8_1C == "(3) Neither favor nor oppose" ~ "Neither",
      Q8_1C %in% c("(1) Strongly oppose", "(2) Oppose") ~ "Oppose",
      TRUE ~ NA_character_
    ),
    
    # Combined LGBTQ support score (0-1 scale)
    lgbtq_support_score = rowMeans(
      cbind(
        # Combined GL/GLBT question
        case_when(coalesce(Q8_1A, Q8_1B) %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  coalesce(Q8_1A, Q8_1B) %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, 
                  TRUE ~ NA_real_),
        # Transgender bathroom
        case_when(Q8_1C %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1C %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, 
                  TRUE ~ NA_real_)
      ), na.rm = TRUE
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation"))

# ---- TABLE 1: SIMPLE COMPARISON (% FAVOR) ----
cat("\n", strrep("=", 80), "\n")
cat("LGBTQ SUPPORT: 1ST VS 2ND GENERATION\n")
cat(strrep("=", 80), "\n\n")

favor_summary <- naas_lgbtq %>%
  group_by(generation) %>%
  summarise(
    N = n(),
    `GL/GLBT Protections\n(% Favor)` = round(100 * mean(lgbtq_gl_combined == "Favor", na.rm = TRUE), 1),
    `Trans Bathrooms\n(% Favor)` = round(100 * mean(lgbtq_trans_bathroom == "Favor", na.rm = TRUE), 1),
    `Mean Support\n(0-1 scale)` = round(mean(lgbtq_support_score, na.rm = TRUE), 3),
    .groups = "drop"
  )

favor_html <- favor_summary %>%
  kable(
    format = "html",
    caption = "LGBTQ Support by Generation: Moral Conservatism Comparison",
    digits = 1,
    align = c("l", "c", "c", "c", "c")
  ) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  column_spec(1, bold = TRUE) %>%
  add_footnote(
    label = c(
      "GL/GLBT = Combined gay/lesbian and gay/lesbian/transgender protection questions (split sample)",
      "Trans = Transgender bathroom access",
      "Higher % = More progressive/less morally conservative"
    ),
    notation = "symbol"
  )

print(favor_html)

# ---- TABLE 2: BY GENERATION AND AGE ----
cat("\n", strrep("=", 80), "\n")
cat("LGBTQ SUPPORT: BY GENERATION × AGE\n")
cat(strrep("=", 80), "\n\n")

gen_age_summary <- naas_lgbtq %>%
  mutate(group = paste0(generation, ", ", age_group)) %>%
  group_by(generation, age_group, group) %>%
  summarise(
    N = n(),
    `GL/GLBT %` = round(100 * mean(lgbtq_gl_combined == "Favor", na.rm = TRUE), 1),
    `Trans %` = round(100 * mean(lgbtq_trans_bathroom == "Favor", na.rm = TRUE), 1),
    `Mean` = round(mean(lgbtq_support_score, na.rm = TRUE), 3),
    .groups = "drop"
  ) %>%
  select(-generation, -age_group) %>%
  select(group, everything())

gen_age_html <- gen_age_summary %>%
  kable(
    format = "html",
    caption = "LGBTQ Support by Generation and Age: Moral Conservatism Patterns",
    col.names = c("Group", "N", "GL/GLBT\nProtect %", "Trans\nBath %", "Mean\nScore"),
    digits = 1,
    align = c("l", "c", "c", "c", "c")
  ) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  column_spec(1, bold = TRUE)

print(gen_age_html)

# ---- TABLE 3: FULL DISTRIBUTION (FAVOR/NEITHER/OPPOSE) ----
cat("\n", strrep("=", 80), "\n")
cat("FULL RESPONSE DISTRIBUTION\n")
cat(strrep("=", 80), "\n\n")

# Function to make full distribution table
make_full_dist <- function(data, var, label) {
  data %>%
    filter(!is.na(.data[[var]])) %>%
    count(generation, .data[[var]]) %>%
    group_by(generation) %>%
    mutate(percent = round(100 * n / sum(n), 1)) %>%
    ungroup() %>%
    pivot_wider(names_from = generation, 
                values_from = c(n, percent),
                names_glue = "{generation}_{.value}",
                values_fill = 0) %>%
    mutate(Question = label, .before = 1) %>%
    rename(Response = 2)
}

full_dist <- bind_rows(
  make_full_dist(naas_lgbtq, "lgbtq_gl_combined", 
                 "GL/GLBT Legal Protections (Combined)"),
  make_full_dist(naas_lgbtq, "lgbtq_trans_bathroom", 
                 "Transgender Bathrooms")
)

full_dist_html <- full_dist %>%
  kable(
    format = "html",
    caption = "Full LGBTQ Response Distribution by Generation",
    col.names = c("Question", "Response", "N", "%", "N", "%"),
    digits = 1
  ) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed")) %>%
  add_header_above(c(" " = 2, "1st Generation" = 2, "2nd Generation" = 2)) %>%
  collapse_rows(columns = 1, valign = "top")

print(full_dist_html)

# ---- STATISTICAL TESTS ----
cat("\n", strrep("=", 80), "\n")
cat("STATISTICAL TESTS: DO GENERATIONS DIFFER?\n")
cat(strrep("=", 80), "\n\n")

# Chi-square tests
test_results <- tibble(
  Question = c("GL/GLBT Protections (Combined)", "Trans Bathrooms", "Mean Support Score"),
  Test = c("Chi-square", "Chi-square", "t-test"),
  Statistic = NA_real_,
  p_value = NA_real_,
  Significant = NA_character_
)

# Test 1: Combined GL/GLBT
test1 <- naas_lgbtq %>%
  filter(!is.na(lgbtq_gl_combined)) %>%
  select(generation, lgbtq_gl_combined) %>%
  table() %>%
  chisq.test()

test_results[1, "Statistic"] <- test1$statistic
test_results[1, "p_value"] <- test1$p.value

# Test 2: Trans
test2 <- naas_lgbtq %>%
  filter(!is.na(lgbtq_trans_bathroom)) %>%
  select(generation, lgbtq_trans_bathroom) %>%
  table() %>%
  chisq.test()

test_results[2, "Statistic"] <- test2$statistic
test_results[2, "p_value"] <- test2$p.value

# Test 3: Mean score t-test
test3 <- t.test(lgbtq_support_score ~ generation, data = naas_lgbtq)

test_results[3, "Statistic"] <- test3$statistic
test_results[3, "p_value"] <- test3$p.value

# Add significance markers
test_results <- test_results %>%
  mutate(
    Significant = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01 ~ "**",
      p_value < 0.05 ~ "*",
      p_value < 0.10 ~ "·",
      TRUE ~ "No"
    )
  )

test_html <- test_results %>%
  kable(
    format = "html",
    caption = "Statistical Tests: Generational Differences in LGBTQ Attitudes",
    digits = 4,
    align = c("l", "l", "c", "c", "c")
  ) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  column_spec(1, bold = TRUE)

print(test_html)

# ---- VISUALIZATION WITH ERROR BARS: GENERATION + AGE ----
cat("\n", strrep("=", 80), "\n")
cat("CREATING VISUALIZATION WITH ERROR BARS\n")
cat(strrep("=", 80), "\n\n")

# Calculate proportions and standard errors
plot_data <- naas_lgbtq %>%
  mutate(gen_age = paste0(generation, "\n", age_group)) %>%
  group_by(generation, age_group, gen_age) %>%
  summarise(
    # GL/GLBT Combined
    n_gl = sum(!is.na(lgbtq_gl_combined)),
    gl_favor = sum(lgbtq_gl_combined == "Favor", na.rm = TRUE),
    gl_pct = mean(lgbtq_gl_combined == "Favor", na.rm = TRUE) * 100,
    gl_se = sqrt((gl_pct/100) * (1 - gl_pct/100) / n_gl) * 100,
    
    # Trans
    n_trans = sum(!is.na(lgbtq_trans_bathroom)),
    trans_favor = sum(lgbtq_trans_bathroom == "Favor", na.rm = TRUE),
    trans_pct = mean(lgbtq_trans_bathroom == "Favor", na.rm = TRUE) * 100,
    trans_se = sqrt((trans_pct/100) * (1 - trans_pct/100) / n_trans) * 100,
    
    .groups = "drop"
  ) %>%
  pivot_longer(
    cols = c(gl_pct, trans_pct),
    names_to = "Question",
    values_to = "Percent_Favor"
  ) %>%
  mutate(
    SE = case_when(
      Question == "gl_pct" ~ gl_se,
      Question == "trans_pct" ~ trans_se
    ),
    CI_lower = Percent_Favor - 1.96 * SE,
    CI_upper = Percent_Favor + 1.96 * SE,
    Question = recode(Question,
                     "gl_pct" = "Gay/Lesbian/(Trans)\nProtections",
                     "trans_pct" = "Transgender\nBathrooms"),
    gen_age = factor(gen_age, levels = c("1st generation\nUnder 35",
                                          "1st generation\n35 and over",
                                          "2nd generation\nUnder 35",
                                          "2nd generation\n35 and over"))
  )

ggplot(plot_data, aes(x = Question, y = Percent_Favor, fill = gen_age)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7, alpha = 0.8) +
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper),
                position = position_dodge(width = 0.8),
                width = 0.25, linewidth = 0.7, color = "gray20") +
  geom_text(aes(y = CI_upper, label = paste0(round(Percent_Favor, 1), "%")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, hjust = 0.5, size = 3) +
  scale_fill_manual(
    values = c("1st generation\nUnder 35" = "#E74C3C",
               "1st generation\n35 and over" = "#C0392B",
               "2nd generation\nUnder 35" = "#3498DB",
               "2nd generation\n35 and over" = "#2E86C1"),
    name = "Group"
  ) +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 100, 20),
                     expand = expansion(mult = c(0, 0.02))) +
  labs(
    title = "LGBTQ Support: Moral Conservatism by Generation and Age",
    subtitle = "Higher % = More Progressive (Less Morally Conservative) | Error bars show 95% CI",
    x = NULL,
    y = "Percent Favoring (%)",
    caption = "GL/(Trans) combines split-sample questions on gay/lesbian and gay/lesbian/transgender protections\n1st gen = Foreign-born | 2nd gen = US-born with both parents foreign-born"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 10, color = "gray30"),
    plot.caption = element_text(size = 9, hjust = 0, color = "gray50"),
    axis.text.x = element_text(size = 10),
    panel.grid.minor = element_blank()
  )

ggsave("~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_Moral_Conservatism_by_Generation.png",
       width = 11, height = 7, dpi = 300, bg = "white")

# ---- VISUALIZATION WITH ERROR BARS: GENERATION ONLY (NO AGE) ----
cat("\n", strrep("=", 80), "\n")
cat("CREATING VISUALIZATION: GENERATION COMPARISON WITH ERROR BARS\n")
cat(strrep("=", 80), "\n\n")

plot_data_gen <- naas_lgbtq %>%
  group_by(generation) %>%
  summarise(
    # GL/GLBT Combined
    n_gl = sum(!is.na(lgbtq_gl_combined)),
    gl_favor = sum(lgbtq_gl_combined == "Favor", na.rm = TRUE),
    gl_pct = mean(lgbtq_gl_combined == "Favor", na.rm = TRUE) * 100,
    gl_se = sqrt((gl_pct/100) * (1 - gl_pct/100) / n_gl) * 100,
    
    # Trans
    n_trans = sum(!is.na(lgbtq_trans_bathroom)),
    trans_favor = sum(lgbtq_trans_bathroom == "Favor", na.rm = TRUE),
    trans_pct = mean(lgbtq_trans_bathroom == "Favor", na.rm = TRUE) * 100,
    trans_se = sqrt((trans_pct/100) * (1 - trans_pct/100) / n_trans) * 100,
    
    .groups = "drop"
  ) %>%
  pivot_longer(
    cols = c(gl_pct, trans_pct),
    names_to = "Question",
    values_to = "Percent_Favor"
  ) %>%
  mutate(
    SE = case_when(
      Question == "gl_pct" ~ gl_se,
      Question == "trans_pct" ~ trans_se
    ),
    CI_lower = Percent_Favor - 1.96 * SE,
    CI_upper = Percent_Favor + 1.96 * SE,
    Question = recode(Question,
                     "gl_pct" = "Gay/Lesbian/(Trans)\nProtections",
                     "trans_pct" = "Transgender\nBathrooms"),
    Question = factor(Question, levels = c("Gay/Lesbian/(Trans)\nProtections",
                                            "Transgender\nBathrooms"))
  )

ggplot(plot_data_gen, aes(x = Question, y = Percent_Favor, fill = generation)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7, alpha = 0.8) +
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper),
                position = position_dodge(width = 0.8),
                width = 0.25, linewidth = 0.8, color = "gray20") +
  geom_text(aes(y = CI_upper, label = paste0(round(Percent_Favor, 1), "%")),
            position = position_dodge(width = 0.8),
            vjust = -0.5, hjust = 0.5, size = 4, fontface = "bold") +
  scale_fill_manual(
    values = c("1st generation" = "#E74C3C",
               "2nd generation" = "#3498DB"),
    name = "Generation",
    labels = c("1st Generation\n(Foreign-born)", 
               "2nd Generation\n(US-born, both parents FB)")
  ) +
  scale_y_continuous(limits = c(0, 120), breaks = seq(0, 100, 20),
                     expand = expansion(mult = c(0, 0.02))) +
  labs(
    title = "LGBTQ Support: Moral Conservatism by Generation",
    subtitle = "Percent who favor LGBTQ-supportive policies | Higher % = More Progressive (Less Morally Conservative)\nError bars show 95% confidence intervals",
    x = NULL,
    y = "Percent Favoring (%)",
    caption = "Gay/Lesbian/(Trans) combines split-sample questions (Q8_1A & Q8_1B) on gay/lesbian and gay/lesbian/transgender protections\nSource: NAAS 2016 | N = 3,707 (1st gen), 815 (2nd gen)"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "bottom",
    legend.title = element_text(face = "bold", size = 12),
    legend.text = element_text(size = 11),
    plot.title = element_text(face = "bold", size = 16, hjust = 0),
    plot.subtitle = element_text(size = 10, hjust = 0, color = "gray30"),
    plot.caption = element_text(size = 9, hjust = 0, color = "gray50"),
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 11),
    axis.title.y = element_text(size = 12, face = "bold"),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

ggsave("~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_by_Generation_Simple.png",
       width = 10, height = 7, dpi = 300, bg = "white")

# ---- SAVE TABLES ----
save_kable(favor_html,
           "~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_Simple_Comparison.html")

save_kable(gen_age_html,
           "~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_by_Generation_Age.html")

save_kable(full_dist_html,
           "~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_Full_Distribution.html")

save_kable(test_html,
           "~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_Statistical_Tests.html")

# Save CSV
write_csv(favor_summary, 
          "~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_Summary_by_Generation.csv")

# ---- SUMMARY STATISTICS ----
cat("\n", strrep("=", 80), "\n")
cat("SUMMARY: MORAL CONSERVATISM FINDINGS\n")
cat(strrep("=", 80), "\n\n")

cat("1ST GENERATION:\n")
favor_summary %>% filter(generation == "1st generation") %>% print()

cat("\n2ND GENERATION:\n")
favor_summary %>% filter(generation == "2nd generation") %>% print()

cat("\n\nGENERATIONAL DIFFERENCE:\n")
diff <- favor_summary %>%
  select(-N) %>%
  pivot_longer(-generation, names_to = "measure", values_to = "value") %>%
  pivot_wider(names_from = generation, values_from = value) %>%
  mutate(Difference = `2nd generation` - `1st generation`) %>%
  select(Measure = measure, `1st Gen` = `1st generation`, 
         `2nd Gen` = `2nd generation`, Difference)

print(diff)

cat("\n\nINTERPRETATION:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Positive difference = 2nd gen MORE progressive (LESS morally conservative)\n")
cat("Negative difference = 2nd gen MORE conservative\n")
cat("\nNOTE: Q8_1A (GLBT) and Q8_1B (GL) were asked to different respondents\n")
cat("      (split-sample design). They are now combined using coalesce().\n")

cat("\n✓ LGBTQ moral conservatism analysis complete!\n\n")
cat("Files saved:\n")
cat("  - LGBTQ_Simple_Comparison.html (main table)\n")
cat("  - LGBTQ_by_Generation_Age.html (age breakdown)\n")
cat("  - LGBTQ_Full_Distribution.html (full responses)\n")
cat("  - LGBTQ_Statistical_Tests.html (significance tests)\n")
cat("  - LGBTQ_Moral_Conservatism_by_Generation.png (visualization with age + error bars)\n")
cat("  - LGBTQ_by_Generation_Simple.png (simplified visualization + error bars)\n")
cat("  - LGBTQ_Summary_by_Generation.csv (data)\n")
```
Predicting Moral Conservatism by Generation
```{r}
# ============================================================
# GENERATIONAL DIFFERENCE IN LGBTQ+ SUPPORT
# Focus: 1st gen vs 2nd gen effect across models
# ============================================================

library(tidyverse)
library(broom)
library(kableExtra)
library(stargazer)
library(webshot2)

# ---- PREPARE DATA ----
lgbtq_predict <- naas_lgbtq %>%
  mutate(
    # Outcome: Binary LGBTQ support
    lgbtq_favor = case_when(
      lgbtq_support_score >= 0.67 ~ 1,
      lgbtq_support_score < 0.67 ~ 0,
      TRUE ~ NA_real_
    ),
    
    # Religion categories
    religion_cat = case_when(
      religion == "Christian" ~ "Christian",
      religion == "Non-religious" ~ "Non-religious",
      religion %in% c("Buddhist", "Hindu", "Sikh") ~ "Eastern Religion",
      religion == "Muslim" ~ "Muslim",
      TRUE ~ "Other"
    ),
    
    # Ethnicity
    ethnicity_major = case_when(
      ethnicity %in% c("Chinese", "Filipino", "Indian", "Japanese", 
                       "Korean", "Vietnamese") ~ ethnicity,
      TRUE ~ "Other"
    ),
    
    # Party ID
    party_simple = case_when(
      party == "Democrat" ~ "Democrat",
      party == "Republican" ~ "Republican",
      party %in% c("Independent", "Other party", "No party affiliation") ~ "Independent/None",
      TRUE ~ NA_character_
    )
  ) %>%
  mutate(
    generation = relevel(factor(generation), ref = "2nd generation"),  # CHANGED: 2nd gen now reference
    religion_cat = relevel(factor(religion_cat), ref = "Christian"),
    party_simple = relevel(factor(party_simple), ref = "Independent/None"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- RUN MODELS ----
cat("\n", strrep("=", 80), "\n")
cat("GENERATIONAL DIFFERENCE IN LGBTQ+ SUPPORT\n")
cat(strrep("=", 80), "\n\n")

# Model 1: Bivariate (no controls)
m1 <- glm(lgbtq_favor ~ generation, 
          data = lgbtq_predict, family = binomial)

# Model 2: + Demographics
m2 <- glm(lgbtq_favor ~ generation + age_35plus + male + college_plus + 
            income_100k_plus + religion_cat,
          data = lgbtq_predict, family = binomial)

# Model 3: + Ethnicity
m3 <- glm(lgbtq_favor ~ generation + age_35plus + male + college_plus + 
            income_100k_plus + religion_cat + ethnicity_major,
          data = lgbtq_predict, family = binomial)

# Model 4: + Party ID (Full model)
m4 <- glm(lgbtq_favor ~ generation + age_35plus + male + college_plus + 
            income_100k_plus + religion_cat + ethnicity_major + party_simple,
          data = lgbtq_predict, family = binomial)

# ---- CREATE OUTPUT DIRECTORY ----
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

# ---- TABLE: GENERATION EFFECT ONLY ----
cat("\n=== GENERATION COEFFICIENT ACROSS MODELS ===\n\n")

stargazer(m1, m2, m3, m4,
          type = "html",
          title = "Generation Effect on LGBTQ Support (1st vs 2nd Generation)",
          dep.var.labels = "LGBTQ Support",
          covariate.labels = "1st Generation (vs 2nd)",  # CHANGED LABEL
          column.labels = c("(1)<br>Bivariate", "(2)<br>+Demographics", 
                           "(3)<br>+Ethnicity", "(4)<br>+Party ID"),
          model.numbers = FALSE,
          keep = "generation",  # ONLY SHOW GENERATION COEFFICIENT
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          notes = c("<i>Notes:</i> Logistic regression coefficients with 95% confidence intervals.",
                    "<b>Model 1:</b> No controls",
                    "<b>Model 2:</b> Controls for age, gender, education, income, religion",
                    "<b>Model 3:</b> Adds ethnicity controls",
                    "<b>Model 4:</b> Adds party identification",
                    "All demographic controls included but not shown. *p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes", "Yes"),
            c("Ethnicity", "No", "No", "Yes", "Yes"),
            c("Party ID", "No", "No", "No", "Yes")
          ),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_LGBTQ.html")

# Convert to PNG
webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_LGBTQ.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_LGBTQ.png",
        vwidth = 1000, vheight = 600, zoom = 2)

# Console output
stargazer(m1, m2, m3, m4,
          type = "text",
          title = "Generation Effect on LGBTQ Support",
          column.labels = c("Bivariate", "+Demog", "+Ethnic", "+Party"),
          model.numbers = FALSE,
          keep = "generation",
          ci = TRUE,
          ci.level = 0.95,
          star.cutoffs = c(0.05, 0.01, 0.001),
          omit.stat = c("aic", "ll"),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes", "Yes"),
            c("Ethnicity", "No", "No", "Yes", "Yes"),
            c("Party ID", "No", "No", "No", "Yes")
          ),
          digits = 3)

# ---- ODDS RATIOS TABLE ----
cat("\n\n", strrep("=", 80), "\n")
cat("GENERATION EFFECT: ODDS RATIOS\n")
cat(strrep("=", 80), "\n\n")

or_table <- bind_rows(
  tidy(m1, conf.int = TRUE) %>% mutate(Model = "Model 1: Bivariate"),
  tidy(m2, conf.int = TRUE) %>% mutate(Model = "Model 2: + Demographics"),
  tidy(m3, conf.int = TRUE) %>% mutate(Model = "Model 3: + Ethnicity"),
  tidy(m4, conf.int = TRUE) %>% mutate(Model = "Model 4: + Party ID")
) %>%
  filter(term == "generation1st generation") %>%  # CHANGED TERM
  mutate(
    OR = exp(estimate),
    OR_lower = exp(conf.low),
    OR_upper = exp(conf.high),
    `95% CI` = paste0("[", round(OR_lower, 2), ", ", round(OR_upper, 2), "]"),
    sig = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      TRUE ~ ""
    )
  ) %>%
  select(Model, OR, `95% CI`, p.value, sig)

or_html <- or_table %>%
  mutate(OR = round(OR, 2),
         p.value = round(p.value, 4)) %>%
  kable(
    format = "html",
    caption = "<b>1st Generation vs 2nd Generation: Odds of Supporting LGBTQ Policies</b>",  # CHANGED
    col.names = c("Model Specification", "Odds Ratio", "95% CI", "p-value", ""),
    align = c("l", "c", "c", "c", "c")
  ) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  column_spec(1, bold = TRUE, width = "15em") %>%
  column_spec(2, bold = TRUE, color = "darkblue") %>%
  row_spec(0, bold = TRUE, background = "#f0f0f0") %>%
  add_footnote(
    label = c(
      "OR < 1 = 1st generation LESS likely to support LGBTQ policies than 2nd generation",  # CHANGED
      "OR > 1 = 1st generation MORE likely to support LGBTQ policies than 2nd generation",  # CHANGED
      "Model 2 controls: age, gender, education, income, religion",
      "Model 3 adds: ethnicity fixed effects",
      "Model 4 adds: party identification",
      "Interpretation: Does generation effect persist after adding controls?"
    ),
    notation = "symbol"
  )

print(or_html)

save_kable(or_html, "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_OR_LGBTQ.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_OR_LGBTQ.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_OR_LGBTQ.png",
        vwidth = 900, vheight = 550, zoom = 2)

# ---- PREDICTED PROBABILITIES: 1ST VS 2ND GEN ----
cat("\n\n", strrep("=", 80), "\n")
cat("PREDICTED PROBABILITIES BY GENERATION\n")
cat(strrep("=", 80), "\n\n")

# Create typical respondent profile
typical_profile <- expand.grid(
  generation = c("1st generation", "2nd generation"),
  age_35plus = TRUE,
  male = FALSE,
  college_plus = TRUE,
  income_100k_plus = FALSE,
  religion_cat = factor("Non-religious", levels = levels(lgbtq_predict$religion_cat)),
  ethnicity_major = factor("Chinese", levels = levels(lgbtq_predict$ethnicity_major)),
  party_simple = factor("Democrat", levels = levels(lgbtq_predict$party_simple))
)

typical_profile$pred_prob <- predict(m4, newdata = typical_profile, type = "response")

pred_summary <- typical_profile %>%
  mutate(
    Generation = as.character(generation),
    `Predicted Probability` = round(pred_prob, 3),
    `Predicted %` = paste0(round(pred_prob * 100, 1), "%")
  ) %>%
  select(Generation, `Predicted Probability`, `Predicted %`)

pred_html <- pred_summary %>%
  kable(
    format = "html",
    caption = "<b>Predicted Probability of LGBTQ Support by Generation</b><br><i>Typical Profile: Female, Age 35+, College degree, Non-religious, Chinese, Democrat, Income <$100k</i>",
    align = c("l", "c", "c")
  ) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover")) %>%
  column_spec(1, bold = TRUE, width = "10em") %>%
  column_spec(2:3, width = "10em") %>%
  row_spec(0, bold = TRUE, background = "#f0f0f0") %>%
  add_footnote(
    label = c(
      "Based on Model 4 (full controls)",
      paste0("Difference: 1st gen is ", round((pred_summary$`Predicted Probability`[1] - pred_summary$`Predicted Probability`[2]) * 100, 1), " percentage points lower than 2nd gen")
    ),
    notation = "symbol"
  )

print(pred_html)

save_kable(pred_html, "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Predictions_LGBTQ.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Predictions_LGBTQ.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Predictions_LGBTQ.png",
        vwidth = 700, vheight = 400, zoom = 2)

# ---- VISUALIZATION ----
gen_diff <- pred_summary %>%
  mutate(Generation = factor(Generation, levels = c("1st generation", "2nd generation")))

ggplot(gen_diff, aes(x = Generation, y = `Predicted Probability`, fill = Generation)) +
  geom_col(width = 0.6, alpha = 0.9) +
  geom_text(aes(label = `Predicted %`), vjust = -0.5, 
            size = 5, fontface = "bold", color = "black") +
  scale_fill_manual(values = c("1st generation" = "#E74C3C", 
                                "2nd generation" = "#3498DB")) +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2),
                     labels = scales::percent) +
  labs(
    title = "Predicted LGBTQ Support by Generation",
    subtitle = "Controlling for demographics, religion, ethnicity, and party ID",
    x = NULL,
    y = "Predicted Probability of Support",
    caption = "Typical profile: Female, 35+, College degree, Non-religious, Chinese, Democrat, Income <$100k\nBased on full logistic regression model"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 11, hjust = 0.5, color = "gray30"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_text(size = 11, face = "bold")
  )

ggsave("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Comparison_LGBTQ.png",
       width = 8, height = 6, dpi = 300, bg = "white")

# ---- SUMMARY ----
cat("\n\n", strrep("=", 80), "\n")
cat("ANALYSIS COMPLETE\n")
cat(strrep("=", 80), "\n\n")

cat("FOCUS: Generational difference in LGBTQ support (1st vs 2nd generation)\n\n")

cat("KEY FINDING:\n")
cat(strrep("-", 80), "\n")
print(or_table)
cat("\n")

cat("INTERPRETATION:\n")
cat("• Model 1 shows the raw generational difference\n")
cat("• Models 2-4 test if the effect persists after adding controls\n")
cat("• OR < 1 → 1st gen LESS supportive than 2nd gen (more conservative)\n")
cat("• OR > 1 → 1st gen MORE supportive than 2nd gen (less conservative)\n")
cat("• If OR remains significant → generational difference independent of controls\n")
cat("• If OR moves toward 1 → effect explained by demographic differences\n\n")

cat("FILES SAVED:\n")
cat("  1. Generation_Effect_LGBTQ.html/.png - Regression table (generation coef only)\n")
cat("  2. Generation_OR_LGBTQ.html/.png - Odds ratios across models\n")
cat("  3. Generation_Predictions_LGBTQ.html/.png - Predicted probabilities\n")
cat("  4. Generation_Comparison_LGBTQ.png - Visualization\n\n")

cat("✓ Analysis focused exclusively on generational difference\n")
cat("✓ 1st generation coefficient shown (vs 2nd generation as reference)\n")
cat("✓ All demographic controls included but not displayed\n")
cat("✓ Ready for thesis presentation\n\n")
```
Chapter 1: Looking at first-generation LGBTQ vs economic predicting partisanship
```{r}
# ============================================================
# REPUBLICAN IDENTIFICATION: 1ST GENERATION EFFECT
# Focus: Does LGBTQ attitude explain generational difference?
# ============================================================

library(tidyverse)
library(broom)
library(stargazer)
library(webshot2)

# ---- PREPARE COMBINED DATA ----
partisan_policy <- NAAS2016 %>%
  mutate(across(everything(), haven::as_factor)) %>%
  mutate(
    # Generation
    generation = case_when(
      S9 == "(2) Another country" ~ "1st generation",
      S9 == "(1) United States" & Q1_1 == "(2) No" & Q1_2 == "(2) No" ~ "2nd generation",
      TRUE ~ NA_character_
    ),
    
    # DEPENDENT VARIABLE: Republican (vs non-Republican)
    republican = case_when(
      Q10_0A == "(2) Republican" ~ 1,
      Q10_0A %in% c("(1) Democrat", "(3) Independent", 
                    "(4) In terms of some other party",
                    "(5) Do not think in terms of political parties") ~ 0,
      TRUE ~ NA_real_
    ),
    
    # KEY INDEPENDENT VARIABLE: LGBTQ Support (Moral Progressivism)
    lgbtq_support = rowMeans(
      cbind(
        case_when(Q8_1A %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1A %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1B %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1B %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1C %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1C %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_)
      ), na.rm = TRUE
    ),
    
    # Controls
    age_35plus = AGE2 == "(2) 35 or above",
    male = S7 == "(1) Male",
    college_plus = S8 %in% c("(5) College degree or Bachelor's degree", 
                             "(6) Graduate or Professional degree"),
    income_100k_plus = Q10_15 %in% c("(5) $100,000 to $125,000",
                                      "(6) $125,000 to $250,000",
                                      "(7) $250,000 and over"),
    
    religion_cat = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 %in% c("(04) Buddhist", "(15) Hindu", "(29) Sikh") ~ "Eastern Religion",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other"
    ),
    
    ethnicity_major = case_when(
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      TRUE ~ "Other"
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation")) %>%
  mutate(
    generation = relevel(factor(generation), ref = "2nd generation"),  # 2nd gen as reference
    religion_cat = relevel(factor(religion_cat), ref = "Christian"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- CHECK SAMPLE SIZES ----
cat("\n", strrep("=", 80), "\n")
cat("SAMPLE SIZES\n")
cat(strrep("=", 80), "\n\n")

partisan_policy %>%
  group_by(generation) %>%
  summarise(
    N_total = n(),
    N_republican = sum(republican == 1, na.rm = TRUE),
    N_non_republican = sum(republican == 0, na.rm = TRUE),
    Pct_republican = round(100 * mean(republican, na.rm = TRUE), 1),
    N_lgbtq = sum(!is.na(lgbtq_support)),
    N_complete = sum(!is.na(lgbtq_support) & !is.na(republican)),
    .groups = "drop"
  ) %>% print()

# ---- POOLED MODELS (1ST GEN VS 2ND GEN) ----
cat("\n", strrep("=", 80), "\n")
cat("POOLED MODELS: 1ST GENERATION EFFECT\n")
cat(strrep("=", 80), "\n\n")

# Model 1: Generation only (bivariate)
m1_gen <- glm(republican ~ generation, data = partisan_policy, family = binomial)

# Model 2: Generation + Demographics ONLY (no LGBTQ)
m2_gen_demog <- glm(republican ~ generation + age_35plus + male + 
                      college_plus + income_100k_plus + religion_cat + ethnicity_major,
                    data = partisan_policy, family = binomial)

# Model 3: Generation + Demographics + LGBTQ (full model)
m3_gen_lgbtq <- glm(republican ~ generation + lgbtq_support + age_35plus + male + 
                      college_plus + income_100k_plus + religion_cat + ethnicity_major,
                    data = partisan_policy, family = binomial)

# ---- CREATE OUTPUT DIRECTORY ----
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

# ---- STARGAZER TABLE: 1ST GENERATION EFFECT WITH LGBTQ ----
cat("\n=== TABLE: 1ST GENERATION EFFECT ON REPUBLICAN ID ===\n\n")

stargazer(m1_gen, m2_gen_demog, m3_gen_lgbtq,
          type = "html",
          title = "1st Generation Effect on Republican ID: Role of LGBTQ Attitudes",
          dep.var.labels = "Republican Identification",
          covariate.labels = c(
            "1st Generation (vs 2nd)",
            "LGBTQ Support"
          ),
          column.labels = c("(1)<br>Bivariate", 
                           "(2)<br>+ Demographics",
                           "(3)<br>+ LGBTQ"),
          model.numbers = FALSE,
          keep = c("generation", "lgbtq_support"),  # ONLY SHOW GENERATION AND LGBTQ
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes"),
            c("Ethnicity", "No", "Yes", "Yes")
          ),
          notes = c("<i>Notes:</i> Logistic regression coefficients with 95% confidence intervals.",
                    "Dependent variable: Republican = 1, Non-Republican (Democrat/Independent/None) = 0.",
                    "Model 1: Raw generational difference (no controls).",
                    "Model 2: Generational difference controlling for demographics only.",
                    "Model 3: Does LGBTQ attitude explain remaining generational difference?",
                    "Demographics = age, gender, education, income (controlled but not shown).",
                    "*p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_Effect_Republican_LGBTQ.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_Effect_Republican_LGBTQ.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_Effect_Republican_LGBTQ.png",
        vwidth = 1100, vheight = 650, zoom = 2)

# Text version
stargazer(m1_gen, m2_gen_demog, m3_gen_lgbtq,
          type = "text",
          title = "1st Generation Effect on Republican ID",
          column.labels = c("Bivariate", "+Demog", "+LGBTQ"),
          model.numbers = FALSE,
          keep = c("generation", "lgbtq_support"),
          ci = TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes"),
            c("Ethnicity", "No", "Yes", "Yes")
          ),
          omit.stat = c("aic", "ll"),
          digits = 3)

# ---- SUMMARY ----
cat("\n\n", strrep("=", 80), "\n")
cat("ANALYSIS COMPLETE\n")
cat(strrep("=", 80), "\n\n")

cat("RESEARCH QUESTION:\n")
cat("Does the 1st generation effect on Republican ID persist after controlling for\n")
cat("LGBTQ attitudes (beyond demographic controls)?\n\n")

cat("INTERPRETATION GUIDE:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Model 1 (Bivariate): Raw generational difference\n")
cat("  → Are 1st gen more/less Republican than 2nd gen?\n\n")

cat("Model 2 (+ Demographics): Generation effect after demographic controls\n")
cat("  → Is the difference just due to age/gender/education/income/religion/ethnicity?\n\n")

cat("Model 3 (+ LGBTQ): Does LGBTQ attitude explain what remains?\n")
cat("  → Compare Model 2 vs Model 3 generation coefficients:\n")
cat("    • If 1st gen coefficient DECREASES → LGBTQ attitudes help explain difference\n")
cat("    • If 1st gen coefficient UNCHANGED → LGBTQ attitudes don't explain difference\n")
cat("    • If 1st gen becomes non-significant → LGBTQ fully explains difference\n\n")

cat("LGBTQ Support coefficient (Model 3):\n")
cat("  • Negative coefficient → Pro-LGBTQ attitudes DECREASE Republican ID\n")
cat("  • Shows independent effect of LGBTQ attitudes on partisanship\n\n")

cat("✓ Analysis complete!\n\n")
cat("FILE SAVED:\n")
cat("  - Table_1stGen_Effect_Republican_LGBTQ.html/.png\n\n")
```
Chapter 1: Democratic Identification Version of LGBTQ+ Attitudes
```{r}
# ============================================================
# DEMOCRATIC IDENTIFICATION: 1ST GENERATION EFFECT
# Focus: Do LGBTQ and economic attitudes explain generational difference?
# ============================================================

library(tidyverse)
library(broom)
library(stargazer)
library(webshot2)

# ---- PREPARE COMBINED DATA ----
partisan_policy <- NAAS2016 %>%
  mutate(across(everything(), haven::as_factor)) %>%
  mutate(
    # Generation
    generation = case_when(
      S9 == "(2) Another country" ~ "1st generation",
      S9 == "(1) United States" & Q1_1 == "(2) No" & Q1_2 == "(2) No" ~ "2nd generation",
      TRUE ~ NA_character_
    ),
    
    # DEPENDENT VARIABLE: Democrat (vs non-Democrat)
    democrat = case_when(
      Q10_0A == "(1) Democrat" ~ 1,
      Q10_0A %in% c("(2) Republican", "(3) Independent", 
                    "(4) In terms of some other party",
                    "(5) Do not think in terms of political parties") ~ 0,
      TRUE ~ NA_real_
    ),
    
    # KEY INDEPENDENT VARIABLES:
    
    # 1. LGBTQ Support (Moral Progressivism)
    lgbtq_support = rowMeans(
      cbind(
        case_when(Q8_1A %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1A %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1B %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1B %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1C %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1C %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_)
      ), na.rm = TRUE
    ),
    
    # 2. Economic Progressivism (Q3_6_A, C, D, F, G)
    econ_progressivism = rowMeans(
      cbind(
        # A: Reduce Income Differences
        case_when(Q3_6_A %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_A %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_),
        # C: Increase minimum wage
        case_when(Q3_6_C %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_C %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_),
        # D: Tax millionaires more
        case_when(Q3_6_D %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_D %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_),
        # F: Discourage foreign workers (REVERSE)
        case_when(Q3_6_F %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 0,
                  Q3_6_F %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 1, TRUE ~ NA_real_),
        # G: Spend more on college
        case_when(Q3_6_G %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_G %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_)
      ), na.rm = TRUE
    ),
    
    # Controls
    age_35plus = AGE2 == "(2) 35 or above",
    male = S7 == "(1) Male",
    college_plus = S8 %in% c("(5) College degree or Bachelor's degree", 
                             "(6) Graduate or Professional degree"),
    income_100k_plus = Q10_15 %in% c("(5) $100,000 to $125,000",
                                      "(6) $125,000 to $250,000",
                                      "(7) $250,000 and over"),
    
    religion_cat = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 %in% c("(04) Buddhist", "(15) Hindu", "(29) Sikh") ~ "Eastern Religion",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other"
    ),
    
    ethnicity_major = case_when(
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      TRUE ~ "Other"
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation")) %>%
  mutate(
    generation = relevel(factor(generation), ref = "2nd generation"),  # 2nd gen as reference
    religion_cat = relevel(factor(religion_cat), ref = "Christian"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- CHECK SAMPLE SIZES ----
cat("\n", strrep("=", 80), "\n")
cat("SAMPLE SIZES\n")
cat(strrep("=", 80), "\n\n")

partisan_policy %>%
  group_by(generation) %>%
  summarise(
    N_total = n(),
    N_democrat = sum(democrat == 1, na.rm = TRUE),
    N_non_democrat = sum(democrat == 0, na.rm = TRUE),
    Pct_democrat = round(100 * mean(democrat, na.rm = TRUE), 1),
    N_lgbtq = sum(!is.na(lgbtq_support)),
    N_econ = sum(!is.na(econ_progressivism)),
    N_complete = sum(!is.na(lgbtq_support) & !is.na(econ_progressivism) & !is.na(democrat)),
    .groups = "drop"
  ) %>% print()

# ---- POOLED MODELS (1ST GEN VS 2ND GEN) ----
cat("\n", strrep("=", 80), "\n")
cat("POOLED MODELS: 1ST GENERATION EFFECT\n")
cat(strrep("=", 80), "\n\n")

# Model 1: Generation only (bivariate)
m1_gen <- glm(democrat ~ generation, data = partisan_policy, family = binomial)

# Model 2: Generation + Demographics ONLY (no policy attitudes)
m2_gen_demog <- glm(democrat ~ generation + age_35plus + male + 
                      college_plus + income_100k_plus + religion_cat + ethnicity_major,
                    data = partisan_policy, family = binomial)

# Model 3: Generation + Demographics + LGBTQ
m3_gen_lgbtq <- glm(democrat ~ generation + lgbtq_support + age_35plus + male + 
                      college_plus + income_100k_plus + religion_cat + ethnicity_major,
                    data = partisan_policy, family = binomial)

# Model 4: Generation + Demographics + Economic
m4_gen_econ <- glm(democrat ~ generation + econ_progressivism + age_35plus + male + 
                     college_plus + income_100k_plus + religion_cat + ethnicity_major,
                   data = partisan_policy, family = binomial)

# Model 5: Generation + Demographics + Both policies
m5_gen_both <- glm(democrat ~ generation + lgbtq_support + econ_progressivism + 
                     age_35plus + male + college_plus + income_100k_plus + 
                     religion_cat + ethnicity_major,
                   data = partisan_policy, family = binomial)

# ---- CREATE OUTPUT DIRECTORY ----
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

# ---- STARGAZER TABLE: 1ST GENERATION EFFECT WITH POLICIES ----
cat("\n=== TABLE: 1ST GENERATION EFFECT ON DEMOCRATIC ID ===\n\n")

stargazer(m1_gen, m2_gen_demog, m3_gen_lgbtq, m4_gen_econ, m5_gen_both,
          type = "html",
          title = "1st Generation Effect on Democratic ID: Role of LGBTQ and Economic Attitudes",
          dep.var.labels = "Democratic Identification",
          covariate.labels = c(
            "1st Generation (vs 2nd)",
            "LGBTQ Support",
            "Economic Progressivism"
          ),
          column.labels = c("(1)<br>Bivariate", 
                           "(2)<br>+ Demographics",
                           "(3)<br>+ LGBTQ",
                           "(4)<br>+ Economic",
                           "(5)<br>+ Both"),
          model.numbers = FALSE,
          keep = c("generation", "lgbtq_support", "econ_progressivism"),  # ONLY SHOW KEY VARIABLES
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes", "Yes", "Yes"),
            c("Ethnicity", "No", "Yes", "Yes", "Yes", "Yes")
          ),
          notes = c("<i>Notes:</i> Logistic regression coefficients with 95% confidence intervals.",
                    "Dependent variable: Democrat = 1, Non-Democrat (Republican/Independent/Other/None) = 0.",
                    "Model 1: Raw generational difference (no controls).",
                    "Model 2: Generational difference controlling for demographics only.",
                    "Models 3-5: Do policy attitudes explain remaining generational difference?",
                    "Economic Progressivism = reduce inequality, minimum wage, tax rich, foreign workers (R), college spending.",
                    "Demographics = age, gender, education, income (controlled but not shown).",
                    "*p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_Effect_Democrat_Policies.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_Effect_Democrat_Policies.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_Effect_Democrat_Policies.png",
        vwidth = 1300, vheight = 700, zoom = 2)

# Text version
stargazer(m1_gen, m2_gen_demog, m3_gen_lgbtq, m4_gen_econ, m5_gen_both,
          type = "text",
          title = "1st Generation Effect on Democratic ID",
          column.labels = c("Bivar", "+Demog", "+LGBTQ", "+Econ", "+Both"),
          model.numbers = FALSE,
          keep = c("generation", "lgbtq_support", "econ_progressivism"),
          ci = TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes", "Yes", "Yes"),
            c("Ethnicity", "No", "Yes", "Yes", "Yes", "Yes")
          ),
          omit.stat = c("aic", "ll"),
          digits = 3)

# ---- SUMMARY ----
cat("\n\n", strrep("=", 80), "\n")
cat("ANALYSIS COMPLETE\n")
cat(strrep("=", 80), "\n\n")

cat("RESEARCH QUESTION:\n")
cat("Does the 1st generation effect on Democratic ID persist after controlling for\n")
cat("LGBTQ and economic policy attitudes (beyond demographic controls)?\n\n")

cat("INTERPRETATION GUIDE:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Model 1 (Bivariate): Raw generational difference\n")
cat("  → Are 1st gen more/less Democratic than 2nd gen?\n\n")

cat("Model 2 (+ Demographics): Generation effect after demographic controls\n")
cat("  → Is the difference just due to age/gender/education/income/religion/ethnicity?\n\n")

cat("Model 3 (+ LGBTQ): Does LGBTQ attitude explain what remains?\n")
cat("  → Compare Model 2 vs Model 3 generation coefficients\n\n")

cat("Model 4 (+ Economic): Does economic attitude explain what remains?\n")
cat("  → Compare Model 2 vs Model 4 generation coefficients\n\n")

cat("Model 5 (+ Both): Do both policies together explain the difference?\n")
cat("  → Compare Model 2 vs Model 5 generation coefficients\n")
cat("  → Which policy has a stronger independent effect?\n\n")

cat("EXPECTED SIGNS:\n")
cat("  • LGBTQ Support: POSITIVE (pro-LGBTQ → more Democratic)\n")
cat("  • Economic Progressivism: POSITIVE (pro-redistribution → more Democratic)\n\n")

cat("KEY COMPARISON:\n")
cat("  → If 1st gen coefficient changes from Model 2 to Models 3-5:\n")
cat("    Policy attitudes help explain the generational difference\n")
cat("  → In Model 5, compare LGBTQ vs Economic coefficient sizes:\n")
cat("    Which policy matters more for Democratic ID?\n\n")

cat("✓ Analysis complete!\n\n")
cat("FILE SAVED:\n")
cat("  - Table_1stGen_Effect_Democrat_Policies.html/.png\n\n")
```

```{r}
# ============================================================
# PARTISANSHIP: LGBTQ VS ECONOMIC PROGRESSIVISM
# Comparing policy effects by generation with stargazer
# ============================================================

library(tidyverse)
library(broom)
library(stargazer)
library(webshot2)
library(kableExtra)

# ---- PREPARE COMBINED DATA ----
partisan_policy <- NAAS2016 %>%
  mutate(across(everything(), haven::as_factor)) %>%
  mutate(
    # Generation
    generation = case_when(
      S9 == "(2) Another country" ~ "1st generation",
      S9 == "(1) United States" & Q1_1 == "(2) No" & Q1_2 == "(2) No" ~ "2nd generation",
      TRUE ~ NA_character_
    ),
    
    # DEPENDENT VARIABLE: Democrat (vs non-Democrat)
    democrat = case_when(
      Q10_0A == "(1) Democrat" ~ 1,
      Q10_0A %in% c("(2) Republican", "(3) Independent", 
                    "(4) In terms of some other party",
                    "(5) Do not think in terms of political parties") ~ 0,
      TRUE ~ NA_real_
    ),
    
    # KEY INDEPENDENT VARIABLES:
    
    # 1. LGBTQ Support (Moral Progressivism)
    lgbtq_support = rowMeans(
      cbind(
        case_when(Q8_1A %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1A %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1B %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1B %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1C %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1C %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_)
      ), na.rm = TRUE
    ),
    
    # 2. Economic Progressivism (Q3_6_B, C, D, F, G)
    econ_progressivism = rowMeans(
      cbind(
        # B: Regulate banks more strictly
        case_when(Q3_6_B %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_B %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_),
        # C: Increase minimum wage
        case_when(Q3_6_C %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_C %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_),
        # D: Tax millionaires more
        case_when(Q3_6_D %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_D %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_),
        # F: Discourage foreign workers (REVERSE)
        case_when(Q3_6_F %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 0,
                  Q3_6_F %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 1, TRUE ~ NA_real_),
        # G: Spend more on college
        case_when(Q3_6_G %in% c("(1) Strongly agree", "(2) Somewhat agree") ~ 1,
                  Q3_6_G %in% c("(4) Somewhat disagree", "(5) Strongly disagree") ~ 0, TRUE ~ NA_real_)
      ), na.rm = TRUE
    ),
    
    # Controls
    age_35plus = AGE2 == "(2) 35 or above",
    male = S7 == "(1) Male",
    college_plus = S8 %in% c("(5) College degree or Bachelor's degree", 
                             "(6) Graduate or Professional degree"),
    income_100k_plus = Q10_15 %in% c("(5) $100,000 to $125,000", 
                                      "(6) $125,000 to $250,000", 
                                      "(7) $250,000 and over"),
    
    religion_cat = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 %in% c("(04) Buddhist", "(15) Hindu", "(29) Sikh") ~ "Eastern Religion",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other"
    ),
    
    ethnicity_major = case_when(
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      TRUE ~ "Other"
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation")) %>%
  mutate(
    generation = relevel(factor(generation), ref = "1st generation"),
    religion_cat = relevel(factor(religion_cat), ref = "Christian"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- CHECK SAMPLE SIZES ----
cat("\n", strrep("=", 80), "\n")
cat("SAMPLE SIZES\n")
cat(strrep("=", 80), "\n\n")

partisan_policy %>%
  group_by(generation) %>%
  summarise(
    N_total = n(),
    N_democrat = sum(!is.na(democrat)),
    N_lgbtq = sum(!is.na(lgbtq_support)),
    N_econ = sum(!is.na(econ_progressivism)),
    N_both = sum(!is.na(lgbtq_support) & !is.na(econ_progressivism) & !is.na(democrat)),
    .groups = "drop"
  ) %>% print()

# ---- 1ST GENERATION MODELS ----
cat("\n", strrep("=", 80), "\n")
cat("1ST GENERATION MODELS\n")
cat(strrep("=", 80), "\n\n")

data_1st <- partisan_policy %>% filter(generation == "1st generation")

# Model 1a: LGBTQ bivariate
m1a <- glm(democrat ~ lgbtq_support, data = data_1st, family = binomial)

# Model 1b: LGBTQ controlled
m1b <- glm(democrat ~ lgbtq_support + age_35plus + male + college_plus + 
             income_100k_plus + religion_cat + ethnicity_major,
           data = data_1st, family = binomial)

# Model 1c: Economic bivariate
m1c <- glm(democrat ~ econ_progressivism, data = data_1st, family = binomial)

# Model 1d: Economic controlled
m1d <- glm(democrat ~ econ_progressivism + age_35plus + male + college_plus + 
             income_100k_plus + religion_cat + ethnicity_major,
           data = data_1st, family = binomial)

# Model 1e: Both LGBTQ + Economic (horse race)
m1e <- glm(democrat ~ lgbtq_support + econ_progressivism + age_35plus + male + 
             college_plus + income_100k_plus + religion_cat + ethnicity_major,
           data = data_1st, family = binomial)

# ---- 2ND GENERATION MODELS ----
cat("\n", strrep("=", 80), "\n")
cat("2ND GENERATION MODELS\n")
cat(strrep("=", 80), "\n\n")

data_2nd <- partisan_policy %>% filter(generation == "2nd generation")

# Model 2a: LGBTQ bivariate
m2a <- glm(democrat ~ lgbtq_support, data = data_2nd, family = binomial)

# Model 2b: LGBTQ controlled
m2b <- glm(democrat ~ lgbtq_support + age_35plus + male + college_plus + 
             income_100k_plus + religion_cat + ethnicity_major,
           data = data_2nd, family = binomial)

# Model 2c: Economic bivariate
m2c <- glm(democrat ~ econ_progressivism, data = data_2nd, family = binomial)

# Model 2d: Economic controlled
m2d <- glm(democrat ~ econ_progressivism + age_35plus + male + college_plus + 
             income_100k_plus + religion_cat + ethnicity_major,
           data = data_2nd, family = binomial)

# Model 2e: Both LGBTQ + Economic (horse race)
m2e <- glm(democrat ~ lgbtq_support + econ_progressivism + age_35plus + male + 
             college_plus + income_100k_plus + religion_cat + ethnicity_major,
           data = data_2nd, family = binomial)

# ---- STARGAZER TABLE 1: 1ST GENERATION ----
cat("\n=== TABLE 1: 1ST GENERATION - LGBTQ VS ECONOMIC ===\n\n")

dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

stargazer(m1a, m1b, m1c, m1d, m1e,
          type = "html",
          title = "1st Generation: LGBTQ vs Economic Progressivism Predicting Democratic ID",
          dep.var.labels = "Democratic Identification",
          covariate.labels = c(
            "LGBTQ Support",
            "Economic Progressivism",
            "Age 35+",
            "Male",
            "College Degree+",
            "Income ≥$100k",
            "Eastern Religion",
            "Non-religious",
            "Other Religion",
            "Chinese",
            "Filipino",
            "Indian",
            "Japanese",
            "Korean",
            "Vietnamese",
            "Other Ethnicity"
          ),
          column.labels = c("(1)<br>LGBTQ<br>Bivariate", 
                           "(2)<br>LGBTQ<br>Controlled",
                           "(3)<br>Economic<br>Bivariate",
                           "(4)<br>Economic<br>Controlled",
                           "(5)<br>Both<br>Policies"),
          model.numbers = FALSE,
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          notes = c("<i>Notes:</i> 1st Generation (Foreign-born) only. Logistic regression coefficients with 95% CI.",
                    "Dependent variable: Democrat = 1, Non-Democrat = 0.",
                    "Model 5 includes both policies to test which matters more when competing.",
                    "Reference: Under 35, Female, No college, Income <$100k, Christian, Other ethnicity.",
                    "*p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_LGBTQ_vs_Economic.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_LGBTQ_vs_Economic.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_1stGen_LGBTQ_vs_Economic.png",
        vwidth = 1400, vheight = 1400, zoom = 2)

# Text version
stargazer(m1a, m1b, m1c, m1d, m1e,
          type = "text",
          title = "1st Generation: LGBTQ vs Economic",
          column.labels = c("LGBTQ-Biv", "LGBTQ-Ctrl", "Econ-Biv", "Econ-Ctrl", "Both"),
          model.numbers = FALSE,
          ci = TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          omit.stat = c("aic", "ll"),
          digits = 3)

# ---- STARGAZER TABLE 2: 2ND GENERATION ----
cat("\n=== TABLE 2: 2ND GENERATION - LGBTQ VS ECONOMIC ===\n\n")

stargazer(m2a, m2b, m2c, m2d, m2e,
          type = "html",
          title = "2nd Generation: LGBTQ vs Economic Progressivism Predicting Democratic ID",
          dep.var.labels = "Democratic Identification",
          covariate.labels = c(
            "LGBTQ Support",
            "Economic Progressivism",
            "Age 35+",
            "Male",
            "College Degree+",
            "Income ≥$100k",
            "Eastern Religion",
            "Non-religious",
            "Other Religion",
            "Chinese",
            "Filipino",
            "Indian",
            "Japanese",
            "Korean",
            "Vietnamese",
            "Other Ethnicity"
          ),
          column.labels = c("(1)<br>LGBTQ<br>Bivariate", 
                           "(2)<br>LGBTQ<br>Controlled",
                           "(3)<br>Economic<br>Bivariate",
                           "(4)<br>Economic<br>Controlled",
                           "(5)<br>Both<br>Policies"),
          model.numbers = FALSE,
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          notes = c("<i>Notes:</i> 2nd Generation (US-born, both parents foreign-born) only. Logistic regression with 95% CI.",
                    "Dependent variable: Democrat = 1, Non-Democrat = 0.",
                    "Model 5 includes both policies to test independent effects when competing.",
                    "Reference: Under 35, Female, No college, Income <$100k, Christian, Other ethnicity.",
                    "*p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_2ndGen_LGBTQ_vs_Economic.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table_2ndGen_LGBTQ_vs_Economic.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_2ndGen_LGBTQ_vs_Economic.png",
        vwidth = 1400, vheight = 1400, zoom = 2)

# Text version
stargazer(m2a, m2b, m2c, m2d, m2e,
          type = "text",
          title = "2nd Generation: LGBTQ vs Economic",
          column.labels = c("LGBTQ-Biv", "LGBTQ-Ctrl", "Econ-Biv", "Econ-Ctrl", "Both"),
          model.numbers = FALSE,
          ci = TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          omit.stat = c("aic", "ll"),
          digits = 3)

# ---- COMBINED TABLE: KEY COMPARISONS ----
cat("\n=== TABLE 3: SIDE-BY-SIDE GENERATION COMPARISON ===\n\n")

stargazer(m1b, m1d, m1e, m2b, m2d, m2e,
          type = "html",
          title = "Policy Effects on Democratic ID: 1st vs 2nd Generation Comparison",
          dep.var.labels = "Democratic Identification",
          covariate.labels = c(
            "LGBTQ Support",
            "Economic Progressivism",
            "Age 35+",
            "Male",
            "College Degree+",
            "Income ≥$100k",
            "Eastern Religion",
            "Non-religious",
            "Other Religion",
            "Chinese",
            "Filipino",
            "Indian",
            "Japanese",
            "Korean",
            "Vietnamese",
            "Other Ethnicity"
          ),
          column.labels = c("(1)<br>LGBTQ", "(2)<br>Economic", "(3)<br>Both",
                           "(4)<br>LGBTQ", "(5)<br>Economic", "(6)<br>Both"),
          model.numbers = FALSE,
          add.lines = list(c("Generation", "1st Gen", "1st Gen", "1st Gen", 
                            "2nd Gen", "2nd Gen", "2nd Gen")),
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          notes = c("<i>Notes:</i> All models include full demographic controls. 95% confidence intervals.",
                    "Models 1-3: 1st Generation (Foreign-born). Models 4-6: 2nd Generation (US-born, both parents FB).",
                    "Column 3 and 6: Both policies together to test independent effects.",
                    "*p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          column.sep.width = "2pt",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_Combined_1st_2nd_Comparison.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table_Combined_1st_2nd_Comparison.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_Combined_1st_2nd_Comparison.png",
        vwidth = 1600, vheight = 1400, zoom = 2)

# ---- ODDS RATIOS COMPARISON TABLE ----
cat("\n=== ODDS RATIOS COMPARISON ===\n\n")

or_comparison <- bind_rows(
  # 1st gen
  tidy(m1b, conf.int = TRUE) %>% filter(term == "lgbtq_support") %>% 
    mutate(Generation = "1st Gen", Policy = "LGBTQ", Model = "Controlled"),
  tidy(m1d, conf.int = TRUE) %>% filter(term == "econ_progressivism") %>% 
    mutate(Generation = "1st Gen", Policy = "Economic", Model = "Controlled"),
  tidy(m1e, conf.int = TRUE) %>% filter(term == "lgbtq_support") %>% 
    mutate(Generation = "1st Gen", Policy = "LGBTQ", Model = "Both"),
  tidy(m1e, conf.int = TRUE) %>% filter(term == "econ_progressivism") %>% 
    mutate(Generation = "1st Gen", Policy = "Economic", Model = "Both"),
  
  # 2nd gen
  tidy(m2b, conf.int = TRUE) %>% filter(term == "lgbtq_support") %>% 
    mutate(Generation = "2nd Gen", Policy = "LGBTQ", Model = "Controlled"),
  tidy(m2d, conf.int = TRUE) %>% filter(term == "econ_progressivism") %>% 
    mutate(Generation = "2nd Gen", Policy = "Economic", Model = "Controlled"),
  tidy(m2e, conf.int = TRUE) %>% filter(term == "lgbtq_support") %>% 
    mutate(Generation = "2nd Gen", Policy = "LGBTQ", Model = "Both"),
  tidy(m2e, conf.int = TRUE) %>% filter(term == "econ_progressivism") %>% 
    mutate(Generation = "2nd Gen", Policy = "Economic", Model = "Both")
) %>%
  mutate(
    OR = exp(estimate),
    OR_lower = exp(conf.low),
    OR_upper = exp(conf.high),
    `95% CI` = paste0("[", round(OR_lower, 2), ", ", round(OR_upper, 2), "]"),
    sig = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      TRUE ~ ""
    )
  ) %>%
  select(Generation, Policy, Model, OR, `95% CI`, p.value, sig)

or_html <- or_comparison %>%
  mutate(OR = round(OR, 3), p.value = round(p.value, 4)) %>%
  kable(
    format = "html",
    caption = "LGBTQ vs Economic Progressivism: Odds Ratios Comparison",
    align = c("l", "l", "l", "c", "c", "c", "c")
  ) %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed")) %>%
  pack_rows("1st Generation", 1, 4) %>%
  pack_rows("2nd Generation", 5, 8) %>%
  add_footnote(
    label = c(
      "OR > 1: Progressive attitudes increase odds of Democratic identification",
      "'Controlled' models: Policy alone with demographics",
      "'Both' models: LGBTQ and Economic competing in same model",
      "Compare OR sizes to see which policy is stronger predictor"
    ),
    notation = "symbol"
  )

print(or_html)

save_kable(or_html,
           "~/Desktop/Thesis/NAAS2016/Fresh Output/OR_LGBTQ_vs_Economic_Comparison.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/OR_LGBTQ_vs_Economic_Comparison.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/OR_LGBTQ_vs_Economic_Comparison.png",
        vwidth = 1200, vheight = 700, zoom = 2)

# ---- VISUALIZATION: EFFECT SIZE COMPARISON ----
cat("\n=== CREATING VISUALIZATION ===\n\n")

# Make sure OR_lower and OR_upper are in the data for plotting
plot_data <- bind_rows(
  # 1st gen
  tidy(m1e, conf.int = TRUE) %>% filter(term == "lgbtq_support") %>% 
    mutate(Generation = "1st Gen", Policy = "LGBTQ"),
  tidy(m1e, conf.int = TRUE) %>% filter(term == "econ_progressivism") %>% 
    mutate(Generation = "1st Gen", Policy = "Economic"),
  
  # 2nd gen
  tidy(m2e, conf.int = TRUE) %>% filter(term == "lgbtq_support") %>% 
    mutate(Generation = "2nd Gen", Policy = "LGBTQ"),
  tidy(m2e, conf.int = TRUE) %>% filter(term == "econ_progressivism") %>% 
    mutate(Generation = "2nd Gen", Policy = "Economic")
) %>%
  mutate(
    OR = exp(estimate),
    OR_lower = exp(conf.low),
    OR_upper = exp(conf.high),
    sig = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      TRUE ~ ""
    ),
    Generation = factor(Generation, levels = c("1st Gen", "2nd Gen")),
    Policy = factor(Policy, levels = c("LGBTQ", "Economic"))
  )

ggplot(plot_data, aes(x = Policy, y = OR, fill = Generation)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_errorbar(aes(ymin = OR_lower, ymax = OR_upper),
                position = position_dodge(width = 0.8),
                width = 0.3, linewidth = 1) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "gray50") +
  geom_text(aes(label = paste0("OR=", round(OR, 2), sig)),
            position = position_dodge(width = 0.8),
            vjust = -0.5, size = 4, fontface = "bold") +
  scale_fill_manual(values = c("1st Gen" = "#E74C3C", "2nd Gen" = "#3498DB")) +
  scale_y_continuous(limits = c(0, max(plot_data$OR_upper) * 1.2)) +
  labs(
    title = "LGBTQ vs Economic Progressivism: Predicting Democratic ID",
    subtitle = "Odds ratios from models with both policies competing (with 95% CI)",
    x = "Policy Attitude",
    y = "Odds Ratio (OR > 1 = More likely Democrat)",
    fill = "Generation",
    caption = "Both policies included in same model with demographic controls"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "bottom",
    plot.title = element_text(face = "bold", size = 15),
    panel.grid.minor = element_blank()
  )

ggsave("~/Desktop/Thesis/NAAS2016/Fresh Output/LGBTQ_vs_Economic_Comparison_Plot.png",
       width = 10, height = 7, dpi = 300, bg = "white")

# ---- SUMMARY TABLE: WINNER BY GENERATION ----
cat("\n", strrep("=", 80), "\n")
cat("WHICH POLICY MATTERS MORE? (From 'Both' Models)\n")
cat(strrep("=", 80), "\n\n")

winner_table <- or_comparison %>%
  filter(Model == "Both") %>%
  select(Generation, Policy, OR, `95% CI`, p.value, sig) %>%
  arrange(Generation, desc(OR))

print(winner_table)

cat("\n\nINTERPRETATION:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("• Within each generation, policy with higher OR matters MORE\n")
cat("• Compare 1st vs 2nd gen to see if priorities shift\n")
cat("• Look at significance: Is one policy robust while other isn't?\n\n")

cat("✓ Analysis complete!\n\n")
cat("Files saved:\n")
cat("  1. Table_1stGen_LGBTQ_vs_Economic.html/.png\n")
cat("     → 5 models for 1st generation\n\n")
cat("  2. Table_2ndGen_LGBTQ_vs_Economic.html/.png\n")
cat("     → 5 models for 2nd generation\n\n")
cat("  3. Table_Combined_1st_2nd_Comparison.html/.png\n")
cat("     → Side-by-side comparison across generations\n\n")
cat("  4. OR_LGBTQ_vs_Economic_Comparison.html/.png\n")
cat("     → Clean odds ratios summary\n\n")
cat("  5. LGBTQ_vs_Economic_Comparison_Plot.png\n")
cat("     → Visual comparison of effect sizes\n\n")

cat("KEY QUESTIONS ANSWERED:\n")
cat("  • Which policy matters MORE for Democratic ID?\n")
cat("  • Does this differ by generation?\n")
cat("  • Do effects persist when policies compete in same model?\n")
cat("  • Are effects robust to demographic controls?\n")
```

Chapter 2: Linked Fate comparison between first and second gen
```{r}
# ============================================================
# GROUP CONSCIOUSNESS (LINKED FATE): 2ND VS 1ST GENERATION
# Similar to LGBTQ support analysis
# ============================================================

library(tidyverse)
library(broom)
library(stargazer)
library(webshot2)

# ---- PREPARE DATA ----
linked_fate_data <- NAAS2016 %>%
  mutate(across(everything(), haven::as_factor)) %>%
  mutate(
    # Generation
    generation = case_when(
      S9 == "(2) Another country" ~ "1st generation",
      S9 == "(1) United States" & Q1_1 == "(2) No" & Q1_2 == "(2) No" ~ "2nd generation",
      TRUE ~ NA_character_
    ),
    
    # DEPENDENT VARIABLE: Group Consciousness (Linked Fate)
    group_consciousness = case_when(
      Q4_3 == "(1) Yes" & Q4_3A == "(1) A lot" ~ 1,
      Q4_3 == "(1) Yes" & Q4_3A == "(2) Some" ~ 0.67,
      Q4_3 == "(1) Yes" & Q4_3A == "(3) Not very much" ~ 0.33,
      Q4_3 == "(2) No" ~ 0,
      TRUE ~ NA_real_
    ),
    
    # Controls
    age_35plus = AGE2 == "(2) 35 or above",
    male = S7 == "(1) Male",
    college_plus = S8 %in% c("(5) College degree or Bachelor's degree", 
                             "(6) Graduate or Professional degree"),
    income_100k_plus = Q10_15 %in% c("(5) $100,000 to $125,000",
                                      "(6) $125,000 to $250,000",
                                      "(7) $250,000 and over"),
    
    religion_cat = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 %in% c("(04) Buddhist", "(15) Hindu", "(29) Sikh") ~ "Eastern Religion",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other"
    ),
    
    ethnicity_major = case_when(
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      TRUE ~ "Other"
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation")) %>%
  mutate(
    generation = relevel(factor(generation), ref = "1st generation"),  # 1st gen as reference
    religion_cat = relevel(factor(religion_cat), ref = "Christian"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- CHECK SAMPLE SIZES ----
cat("\n", strrep("=", 80), "\n")
cat("SAMPLE SIZES\n")
cat(strrep("=", 80), "\n\n")

linked_fate_data %>%
  group_by(generation) %>%
  summarise(
    N_total = n(),
    N_with_data = sum(!is.na(group_consciousness)),
    Mean_linked_fate = round(mean(group_consciousness, na.rm = TRUE), 3),
    SD_linked_fate = round(sd(group_consciousness, na.rm = TRUE), 3),
    .groups = "drop"
  ) %>% print()

# ---- RUN MODELS ----
cat("\n", strrep("=", 80), "\n")
cat("GENERATIONAL DIFFERENCE IN LINKED FATE\n")
cat(strrep("=", 80), "\n\n")

# Model 1: Bivariate (no controls)
m1 <- lm(group_consciousness ~ generation, data = linked_fate_data)

# Model 2: + Demographics
m2 <- lm(group_consciousness ~ generation + age_35plus + male + college_plus + 
           income_100k_plus + religion_cat,
         data = linked_fate_data)

# Model 3: + Ethnicity
m3 <- lm(group_consciousness ~ generation + age_35plus + male + college_plus + 
           income_100k_plus + religion_cat + ethnicity_major,
         data = linked_fate_data)

# ---- CREATE OUTPUT DIRECTORY ----
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

# ---- STARGAZER TABLE: GENERATION EFFECT ONLY ----
cat("\n=== GENERATION COEFFICIENT ACROSS MODELS ===\n\n")

stargazer(m1, m2, m3,
          type = "html",
          title = "Generation Effect on Linked Fate/Group Consciousness (2nd vs 1st Generation)",
          dep.var.labels = "Linked Fate (0-1 scale)",
          covariate.labels = "2nd Generation (vs 1st)",
          column.labels = c("(1)<br>Bivariate", "(2)<br>+Demographics", "(3)<br>+Ethnicity"),
          model.numbers = FALSE,
          keep = "generation",  # ONLY SHOW GENERATION COEFFICIENT
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          notes = c("<i>Notes:</i> OLS regression coefficients with 95% confidence intervals.",
                    "<b>Model 1:</b> No controls",
                    "<b>Model 2:</b> Controls for age, gender, education, income, religion",
                    "<b>Model 3:</b> Adds ethnicity controls",
                    "Linked Fate: 0 = No linked fate, 0.33 = Not very much, 0.67 = Some, 1 = A lot",
                    "All demographic controls included but not shown. *p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes"),
            c("Ethnicity", "No", "No", "Yes")
          ),
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_LinkedFate.html")

# Convert to PNG
webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_LinkedFate.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_LinkedFate.png",
        vwidth = 1000, vheight = 600, zoom = 2)

# Console output
stargazer(m1, m2, m3,
          type = "text",
          title = "Generation Effect on Linked Fate",
          column.labels = c("Bivariate", "+Demog", "+Ethnic"),
          model.numbers = FALSE,
          keep = "generation",
          ci = TRUE,
          ci.level = 0.95,
          star.cutoffs = c(0.05, 0.01, 0.001),
          omit.stat = c("aic", "ll"),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes"),
            c("Ethnicity", "No", "No", "Yes")
          ),
          digits = 3)

# ---- PREDICTIONS FOR VISUALIZATION ----
cat("\n\n", strrep("=", 80), "\n")
cat("PREDICTED LINKED FATE BY GENERATION\n")
cat(strrep("=", 80), "\n\n")

# Create typical respondent profile
typical_profile <- expand.grid(
  generation = c("1st generation", "2nd generation"),
  age_35plus = TRUE,
  male = FALSE,
  college_plus = TRUE,
  income_100k_plus = FALSE,
  religion_cat = factor("Non-religious", levels = levels(linked_fate_data$religion_cat)),
  ethnicity_major = factor("Chinese", levels = levels(linked_fate_data$ethnicity_major))
)

# Get predictions with standard errors from Model 3 (full controls)
predictions <- predict(m3, newdata = typical_profile, se.fit = TRUE)

pred_summary <- typical_profile %>%
  mutate(
    Generation = as.character(generation),
    predicted = predictions$fit,
    se = predictions$se.fit,
    ci_lower = predicted - 1.96 * se,
    ci_upper = predicted + 1.96 * se
  ) %>%
  select(Generation, predicted, se, ci_lower, ci_upper)

print(pred_summary)

# ---- VISUALIZATION: BAR CHART WITH ERROR BARS ----
cat("\n=== CREATING VISUALIZATION ===\n\n")

plot_data <- pred_summary %>%
  mutate(Generation = factor(Generation, levels = c("1st generation", "2nd generation")))

ggplot(plot_data, aes(x = Generation, y = predicted, fill = Generation)) +
  geom_col(width = 0.6, alpha = 0.9) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
                width = 0.2, linewidth = 1, color = "black") +
  geom_text(aes(label = round(predicted, 3)), vjust = -2.5, 
            size = 5, fontface = "bold", color = "black") +
  scale_fill_manual(values = c("1st generation" = "#E74C3C", 
                                "2nd generation" = "#3498DB")) +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2),
                     labels = c("0\n(No linked fate)", "0.2", "0.4", "0.6", "0.8", 
                               "1.0\n(Strong linked fate)")) +
  labs(
    title = "Linked Fate by Generation",
    subtitle = "Controlling for demographics, religion, and ethnicity (with 95% CI)",
    x = NULL,
    y = "Predicted Linked Fate Score",
    caption = "Typical profile: Female, 35+, College degree, Non-religious, Chinese, Income <$100k\nBased on full OLS regression model with all controls\nError bars show 95% confidence intervals"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 11, hjust = 0.5, color = "gray30"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_text(size = 11),
    axis.text.x = element_text(face = "bold", size = 12)
  )

ggsave("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_LinkedFate_Comparison.png",
       width = 8, height = 6, dpi = 300, bg = "white")

# ---- SUMMARY TABLE ----
cat("\n\n", strrep("=", 80), "\n")
cat("SUMMARY: GENERATIONAL DIFFERENCE IN LINKED FATE\n")
cat(strrep("=", 80), "\n\n")

# Extract coefficients from all models
coef_summary <- bind_rows(
  tidy(m1, conf.int = TRUE) %>% mutate(Model = "Model 1: Bivariate"),
  tidy(m2, conf.int = TRUE) %>% mutate(Model = "Model 2: + Demographics"),
  tidy(m3, conf.int = TRUE) %>% mutate(Model = "Model 3: + Ethnicity")
) %>%
  filter(term == "generation2nd generation") %>%
  mutate(
    sig = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      TRUE ~ ""
    )
  ) %>%
  select(Model, estimate, conf.low, conf.high, p.value, sig)

print(coef_summary)

cat("\n\nINTERPRETATION:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("• Positive coefficient = 2nd gen has HIGHER linked fate than 1st gen\n")
cat("• Negative coefficient = 2nd gen has LOWER linked fate than 1st gen\n")
cat("• Linked fate scale: 0 (none) to 1 (a lot)\n")
cat("• Compare coefficients across models to see if effect persists\n\n")

cat("✓ Analysis complete!\n\n")
cat("Files saved:\n")
cat("  1. Generation_Effect_LinkedFate.html/.png - Regression table\n")
cat("  2. Generation_LinkedFate_Comparison.png - Bar chart with error bars\n\n")

cat("KEY FINDING:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat(sprintf("2nd generation linked fate is %.3f points different from 1st generation\n", 
            coef_summary$estimate[3]))
cat(sprintf("(controlling for all demographics and ethnicity)\n"))
cat(sprintf("95%% CI: [%.3f, %.3f] %s\n", 
            coef_summary$conf.low[3], coef_summary$conf.high[3], coef_summary$sig[3]))
cat("\n")
```
Chapter 2: Ethnic ID Salience comparison between generations
```{r}
# ============================================================
# ETHNIC IDENTITY IMPORTANCE: 2ND VS 1ST GENERATION
# Similar to LGBTQ support and linked fate analysis
# ============================================================

library(tidyverse)
library(broom)
library(stargazer)
library(webshot2)

# ---- PREPARE DATA ----
ethnic_identity_data <- NAAS2016 %>%
  mutate(across(everything(), haven::as_factor)) %>%
  mutate(
    # Generation
    generation = case_when(
      S9 == "(2) Another country" ~ "1st generation",
      S9 == "(1) United States" & Q1_1 == "(2) No" & Q1_2 == "(2) No" ~ "2nd generation",
      TRUE ~ NA_character_
    ),
    
    # DEPENDENT VARIABLE: Ethnic Identity Importance
    ethnic_identity_importance = case_when(
      Q4_2A == "(1) Not at all important" ~ 0,
      Q4_2A == "(2) Somewhat important" ~ 0.33,
      Q4_2A == "(3) Very important" ~ 0.67,
      Q4_2A == "(4) Extremely important" ~ 1,
      TRUE ~ NA_real_
    ),
    
    # Controls
    age_35plus = AGE2 == "(2) 35 or above",
    male = S7 == "(1) Male",
    college_plus = S8 %in% c("(5) College degree or Bachelor's degree", 
                             "(6) Graduate or Professional degree"),
    income_100k_plus = Q10_15 %in% c("(5) $100,000 to $125,000",
                                      "(6) $125,000 to $250,000",
                                      "(7) $250,000 and over"),
    
    religion_cat = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 %in% c("(04) Buddhist", "(15) Hindu", "(29) Sikh") ~ "Eastern Religion",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other"
    ),
    
    ethnicity_major = case_when(
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      TRUE ~ "Other"
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation")) %>%
  mutate(
    generation = relevel(factor(generation), ref = "1st generation"),  # 1st gen as reference
    religion_cat = relevel(factor(religion_cat), ref = "Christian"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- CHECK SAMPLE SIZES ----
cat("\n", strrep("=", 80), "\n")
cat("SAMPLE SIZES\n")
cat(strrep("=", 80), "\n\n")

ethnic_identity_data %>%
  group_by(generation) %>%
  summarise(
    N_total = n(),
    N_with_data = sum(!is.na(ethnic_identity_importance)),
    Mean_ethnic_identity = round(mean(ethnic_identity_importance, na.rm = TRUE), 3),
    SD_ethnic_identity = round(sd(ethnic_identity_importance, na.rm = TRUE), 3),
    .groups = "drop"
  ) %>% print()

# ---- RUN MODELS ----
cat("\n", strrep("=", 80), "\n")
cat("GENERATIONAL DIFFERENCE IN ETHNIC IDENTITY IMPORTANCE\n")
cat(strrep("=", 80), "\n\n")

# Model 1: Bivariate (no controls)
m1 <- lm(ethnic_identity_importance ~ generation, data = ethnic_identity_data)

# Model 2: + Demographics
m2 <- lm(ethnic_identity_importance ~ generation + age_35plus + male + college_plus + 
           income_100k_plus + religion_cat,
         data = ethnic_identity_data)

# Model 3: + Ethnicity
m3 <- lm(ethnic_identity_importance ~ generation + age_35plus + male + college_plus + 
           income_100k_plus + religion_cat + ethnicity_major,
         data = ethnic_identity_data)

# ---- CREATE OUTPUT DIRECTORY ----
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

# ---- STARGAZER TABLE: GENERATION EFFECT ONLY ----
cat("\n=== GENERATION COEFFICIENT ACROSS MODELS ===\n\n")

stargazer(m1, m2, m3,
          type = "html",
          title = "Generation Effect on Ethnic Identity Importance (2nd vs 1st Generation)",
          dep.var.labels = "Ethnic Identity Importance (0-1 scale)",
          covariate.labels = "2nd Generation (vs 1st)",
          column.labels = c("(1)<br>Bivariate", "(2)<br>+Demographics", "(3)<br>+Ethnicity"),
          model.numbers = FALSE,
          keep = "generation",  # ONLY SHOW GENERATION COEFFICIENT
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          notes = c("<i>Notes:</i> OLS regression coefficients with 95% confidence intervals.",
                    "<b>Model 1:</b> No controls",
                    "<b>Model 2:</b> Controls for age, gender, education, income, religion",
                    "<b>Model 3:</b> Adds ethnicity controls",
                    "Ethnic Identity: 0 = Not at all important, 0.33 = Somewhat important, 0.67 = Very important, 1 = Extremely important",
                    "All demographic controls included but not shown. *p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes"),
            c("Ethnicity", "No", "No", "Yes")
          ),
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_EthnicIdentity.html")

# Convert to PNG
webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_EthnicIdentity.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_Effect_EthnicIdentity.png",
        vwidth = 1000, vheight = 600, zoom = 2)

# Console output
stargazer(m1, m2, m3,
          type = "text",
          title = "Generation Effect on Ethnic Identity Importance",
          column.labels = c("Bivariate", "+Demog", "+Ethnic"),
          model.numbers = FALSE,
          keep = "generation",
          ci = TRUE,
          ci.level = 0.95,
          star.cutoffs = c(0.05, 0.01, 0.001),
          omit.stat = c("aic", "ll"),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes"),
            c("Ethnicity", "No", "No", "Yes")
          ),
          digits = 3)

# ---- PREDICTIONS FOR VISUALIZATION ----
cat("\n\n", strrep("=", 80), "\n")
cat("PREDICTED ETHNIC IDENTITY IMPORTANCE BY GENERATION\n")
cat(strrep("=", 80), "\n\n")

# Create typical respondent profile
typical_profile <- expand.grid(
  generation = c("1st generation", "2nd generation"),
  age_35plus = TRUE,
  male = FALSE,
  college_plus = TRUE,
  income_100k_plus = FALSE,
  religion_cat = factor("Non-religious", levels = levels(ethnic_identity_data$religion_cat)),
  ethnicity_major = factor("Chinese", levels = levels(ethnic_identity_data$ethnicity_major))
)

# Get predictions with standard errors from Model 3 (full controls)
predictions <- predict(m3, newdata = typical_profile, se.fit = TRUE)

pred_summary <- typical_profile %>%
  mutate(
    Generation = as.character(generation),
    predicted = predictions$fit,
    se = predictions$se.fit,
    ci_lower = predicted - 1.96 * se,
    ci_upper = predicted + 1.96 * se
  ) %>%
  select(Generation, predicted, se, ci_lower, ci_upper)

print(pred_summary)

# ---- VISUALIZATION: BAR CHART WITH ERROR BARS ----
cat("\n=== CREATING VISUALIZATION ===\n\n")

plot_data <- pred_summary %>%
  mutate(Generation = factor(Generation, levels = c("1st generation", "2nd generation")))

ggplot(plot_data, aes(x = Generation, y = predicted, fill = Generation)) +
  geom_col(width = 0.6, alpha = 0.9) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper),
                width = 0.2, linewidth = 1, color = "black") +
  geom_text(aes(label = round(predicted, 3)), vjust = -2.5, 
            size = 5, fontface = "bold", color = "black") +
  scale_fill_manual(values = c("1st generation" = "#E74C3C", 
                                "2nd generation" = "#3498DB")) +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2),
                     labels = c("0\n(Not important)", "0.2", "0.4", "0.6", "0.8", 
                               "1.0\n(Extremely important)")) +
  labs(
    title = "Ethnic Identity Importance by Generation",
    subtitle = "Controlling for demographics, religion, and ethnicity (with 95% CI)",
    x = NULL,
    y = "Predicted Ethnic Identity Importance",
    caption = "Typical profile: Female, 35+, College degree, Non-religious, Chinese, Income <$100k\nBased on full OLS regression model with all controls\nError bars show 95% confidence intervals"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 11, hjust = 0.5, color = "gray30"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_text(size = 11),
    axis.text.x = element_text(face = "bold", size = 12)
  )

ggsave("~/Desktop/Thesis/NAAS2016/Fresh Output/Generation_EthnicIdentity_Comparison.png",
       width = 8, height = 6, dpi = 300, bg = "white")

# ---- SUMMARY TABLE ----
cat("\n\n", strrep("=", 80), "\n")
cat("SUMMARY: GENERATIONAL DIFFERENCE IN ETHNIC IDENTITY IMPORTANCE\n")
cat(strrep("=", 80), "\n\n")

# Extract coefficients from all models
coef_summary <- bind_rows(
  tidy(m1, conf.int = TRUE) %>% mutate(Model = "Model 1: Bivariate"),
  tidy(m2, conf.int = TRUE) %>% mutate(Model = "Model 2: + Demographics"),
  tidy(m3, conf.int = TRUE) %>% mutate(Model = "Model 3: + Ethnicity")
) %>%
  filter(term == "generation2nd generation") %>%
  mutate(
    sig = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      TRUE ~ ""
    )
  ) %>%
  select(Model, estimate, conf.low, conf.high, p.value, sig)

print(coef_summary)

cat("\n\nINTERPRETATION:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("• Positive coefficient = 2nd gen views ethnic identity as MORE important than 1st gen\n")
cat("• Negative coefficient = 2nd gen views ethnic identity as LESS important than 1st gen\n")
cat("• Ethnic identity scale: 0 (not at all important) to 1 (extremely important)\n")
cat("• Compare coefficients across models to see if effect persists\n\n")

cat("✓ Analysis complete!\n\n")
cat("Files saved:\n")
cat("  1. Generation_Effect_EthnicIdentity.html/.png - Regression table\n")
cat("  2. Generation_EthnicIdentity_Comparison.png - Bar chart with error bars\n\n")

cat("KEY FINDING:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat(sprintf("2nd generation ethnic identity importance is %.3f points different from 1st generation\n", 
            coef_summary$estimate[3]))
cat(sprintf("(controlling for all demographics and ethnicity)\n"))
cat(sprintf("95%% CI: [%.3f, %.3f] %s\n", 
            coef_summary$conf.low[3], coef_summary$conf.high[3], coef_summary$sig[3]))
cat("\n")
```
Chapter 2: Predicting Democrat ID via linked fate
```{r}
# ============================================================
# DEMOCRATIC IDENTIFICATION: 2ND GENERATION EFFECT
# Focus: Does linked fate explain generational difference?
# Including interaction: Does linked fate work differently by generation?
# Using comprehensive linked fate index
# ============================================================

library(tidyverse)
library(broom)
library(stargazer)
library(webshot2)

# ---- PREPARE COMBINED DATA ----
partisan_linkedfate <- NAAS2016 %>%
  mutate(across(everything(), haven::as_factor)) %>%
  mutate(
    # Generation
    generation = case_when(
      S9 == "(2) Another country" ~ "1st generation",
      S9 == "(1) United States" & Q1_1 == "(2) No" & Q1_2 == "(2) No" ~ "2nd generation",
      TRUE ~ NA_character_
    ),
    
    # DEPENDENT VARIABLE: Democrat (vs non-Democrat)
    democrat = case_when(
      Q10_0A == "(1) Democrat" ~ 1,
      Q10_0A %in% c("(2) Republican", "(3) Independent", 
                    "(4) In terms of some other party",
                    "(5) Do not think in terms of political parties") ~ 0,
      TRUE ~ NA_real_
    ),
    
    # KEY INDEPENDENT VARIABLE: Linked Fate / Group Consciousness (COMPREHENSIVE INDEX)
    # Incorporates:
    #   Q4_3 + Q4_3A  : "Do you think what happens to other [RACES] in this country affects what happens in your life?"
    #   Q4_4 + Q4_4A  : "Do you think what happens generally to other [RETHNIC] Americans affects what happens in your life?"
    #   Q4_5A–Q4_5D   : "What, if anything, do [INSRACES] in the U.S. share with one another?" (race/culture/econ/political)
    
    # ---- 1) Linked fate: "other [RACES] affects your life" (Q4_3/Q4_3A) ----
    lf_races = case_when(
      Q4_3 == "(1) Yes" & Q4_3A == "(1) A lot" ~ 1,
      Q4_3 == "(1) Yes" & Q4_3A == "(2) Some" ~ 0.67,
      Q4_3 == "(1) Yes" & Q4_3A == "(3) Not very much" ~ 0.33,
      Q4_3 == "(2) No" ~ 0,
      TRUE ~ NA_real_
    ),
    
    # ---- 2) Linked fate: "other [RETHNIC] Americans affects your life" (Q4_4/Q4_4A) ----
    lf_rethnic = case_when(
      Q4_4 == "(1) Yes" & Q4_4A == "(1) A lot" ~ 1,
      Q4_4 == "(1) Yes" & Q4_4A == "(2) Some" ~ 0.67,
      Q4_4 == "(1) Yes" & Q4_4A == "(3) Not very much" ~ 0.33,
      Q4_4 == "(2) No" ~ 0,
      TRUE ~ NA_real_
    ),
    
    # ---- 3) Shared-group identity items (Q4_5A–Q4_5D), coded 0/1 ----
    share_race = case_when(
      Q4_5A %in% c("(1) Yes", "1") ~ 1,
      Q4_5A %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    share_culture = case_when(
      Q4_5B %in% c("(1) Yes", "1") ~ 1,
      Q4_5B %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    share_econ = case_when(
      Q4_5C %in% c("(1) Yes", "1") ~ 1,
      Q4_5C %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    share_political = case_when(
      Q4_5D %in% c("(1) Yes", "1") ~ 1,
      Q4_5D %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    # Average of shared-identity items (0–1)
    identity_solidarity = rowMeans(
      cbind(share_race, share_culture, share_econ, share_political),
      na.rm = TRUE
    ),
    
    # ---- 4) Final composite: Group Consciousness / Linked Fate Index ----
    # Equal-weight average of:
    #   (a) linked fate (RACES) + (b) linked fate (RETHNIC) + (c) identity solidarity
    group_consciousness = rowMeans(
      cbind(lf_races, lf_rethnic, identity_solidarity),
      na.rm = TRUE
    ),
    
    # Controls
    age_35plus = AGE2 == "(2) 35 or above",
    male = S7 == "(1) Male",
    college_plus = S8 %in% c("(5) College degree or Bachelor's degree", 
                             "(6) Graduate or Professional degree"),
    income_100k_plus = Q10_15 %in% c("(5) $100,000 to $125,000",
                                      "(6) $125,000 to $250,000",
                                      "(7) $250,000 and over"),
    
    religion_cat = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 %in% c("(04) Buddhist", "(15) Hindu", "(29) Sikh") ~ "Eastern Religion",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other"
    ),
    
    ethnicity_major = case_when(
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      TRUE ~ "Other"
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation")) %>%
  mutate(
    generation = relevel(factor(generation), ref = "1st generation"),  # 1st gen as reference
    religion_cat = relevel(factor(religion_cat), ref = "Christian"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- CHECK SAMPLE SIZES ----
cat("\n", strrep("=", 80), "\n")
cat("SAMPLE SIZES\n")
cat(strrep("=", 80), "\n\n")

partisan_linkedfate %>%
  group_by(generation) %>%
  summarise(
    N_total = n(),
    N_democrat = sum(democrat == 1, na.rm = TRUE),
    N_non_democrat = sum(democrat == 0, na.rm = TRUE),
    Pct_democrat = round(100 * mean(democrat, na.rm = TRUE), 1),
    N_linkedfate = sum(!is.na(group_consciousness)),
    Mean_linkedfate = round(mean(group_consciousness, na.rm = TRUE), 3),
    SD_linkedfate = round(sd(group_consciousness, na.rm = TRUE), 3),
    N_complete = sum(!is.na(group_consciousness) & !is.na(democrat)),
    .groups = "drop"
  ) %>% print()

# ---- POOLED MODELS (2ND GEN VS 1ST GEN) ----
cat("\n", strrep("=", 80), "\n")
cat("POOLED MODELS: 2ND GENERATION EFFECT\n")
cat(strrep("=", 80), "\n\n")

# Model 1: Generation only (bivariate)
m1_gen <- glm(democrat ~ generation, data = partisan_linkedfate, family = binomial)

# Model 2: Generation + Demographics ONLY (no linked fate)
m2_gen_demog <- glm(democrat ~ generation + age_35plus + male + 
                      college_plus + income_100k_plus + religion_cat + ethnicity_major,
                    data = partisan_linkedfate, family = binomial)

# Model 3: Generation + Demographics + Linked Fate (main effects)
m3_gen_linkedfate <- glm(democrat ~ generation + group_consciousness + age_35plus + male + 
                           college_plus + income_100k_plus + religion_cat + ethnicity_major,
                         data = partisan_linkedfate, family = binomial)

# Model 4: Generation × Linked Fate Interaction
m4_interaction <- glm(democrat ~ generation * group_consciousness + age_35plus + male + 
                        college_plus + income_100k_plus + religion_cat + ethnicity_major,
                      data = partisan_linkedfate, family = binomial)

# ---- CREATE OUTPUT DIRECTORY ----
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output", showWarnings = FALSE, recursive = TRUE)

# ---- STARGAZER TABLE: 2ND GENERATION EFFECT WITH INTERACTION ----
cat("\n=== TABLE: 2ND GENERATION EFFECT ON DEMOCRATIC ID WITH INTERACTION ===\n\n")

stargazer(m1_gen, m2_gen_demog, m3_gen_linkedfate, m4_interaction,
          type = "html",
          title = "2nd Generation Effect on Democratic ID: Role of Linked Fate with Interaction",
          dep.var.labels = "Democratic Identification",
          covariate.labels = c(
            "2nd Generation (vs 1st)",
            "Linked Fate Index",
            "2nd Gen × Linked Fate"
          ),
          column.labels = c("(1)<br>Bivariate", 
                           "(2)<br>+ Demographics",
                           "(3)<br>+ Linked Fate",
                           "(4)<br>+ Interaction"),
          model.numbers = FALSE,
          keep = c("generation", "group_consciousness", ":"),  # SHOW GENERATION, LINKED FATE, AND INTERACTION
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes", "Yes"),
            c("Ethnicity", "No", "Yes", "Yes", "Yes")
          ),
          notes = c("<i>Notes:</i> Logistic regression coefficients with 95% confidence intervals.",
                    "Dependent variable: Democrat = 1, Non-Democrat (Republican/Independent/Other/None) = 0.",
                    "Linked Fate Index = average of: (a) linked fate with other [RACES], (b) linked fate with [RETHNIC] Americans,",
                    "(c) shared identity (race, culture, economic, political interests). Scale: 0 (no linked fate) to 1 (strong linked fate).",
                    "Model 1: Raw generational difference. Model 2: + Demographics. Model 3: + Linked fate main effect.",
                    "Model 4: Tests if linked fate effect differs by generation (interaction).",
                    "Interaction term: Positive = Linked fate has STRONGER effect for 2nd gen than 1st gen.",
                    "Demographics = age, gender, education, income (controlled but not shown).",
                    "*p<0.05; **p<0.01; ***p<0.001"),
          notes.align = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          digits = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_2ndGen_Democrat_LinkedFate_Interaction.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table_2ndGen_Democrat_LinkedFate_Interaction.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_2ndGen_Democrat_LinkedFate_Interaction.png",
        vwidth = 1200, vheight = 800, zoom = 2)

# Text version
stargazer(m1_gen, m2_gen_demog, m3_gen_linkedfate, m4_interaction,
          type = "text",
          title = "2nd Generation Effect on Democratic ID with Interaction",
          column.labels = c("Bivariate", "+Demog", "+LinkedFate", "+Interaction"),
          model.numbers = FALSE,
          keep = c("generation", "group_consciousness", ":"),
          ci = TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(
            c("Demographics", "No", "Yes", "Yes", "Yes"),
            c("Religion", "No", "Yes", "Yes", "Yes"),
            c("Ethnicity", "No", "Yes", "Yes", "Yes")
          ),
          omit.stat = c("aic", "ll"),
          digits = 3)

# ---- INTERPRETATION OF INTERACTION ----
cat("\n\n", strrep("=", 80), "\n")
cat("INTERACTION INTERPRETATION\n")
cat(strrep("=", 80), "\n\n")

# Extract interaction coefficient
interaction_coef <- tidy(m4_interaction, conf.int = TRUE) %>%
  filter(grepl(":", term)) %>%
  select(term, estimate, conf.low, conf.high, p.value)

print(interaction_coef)

cat("\n\nHOW TO INTERPRET THE INTERACTION:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Model 3 assumes linked fate has the SAME effect for 1st and 2nd gen.\n")
cat("Model 4 allows the effect to DIFFER by generation.\n\n")

cat("In Model 4:\n")
cat("• 'Linked Fate Index' coefficient = Effect of linked fate for 1st gen (reference)\n")
cat("• '2nd Gen × Linked Fate' = How much MORE/LESS linked fate matters for 2nd gen\n\n")

cat("If interaction is:\n")
cat("  • POSITIVE & significant → Linked fate matters MORE for 2nd gen\n")
cat("  • NEGATIVE & significant → Linked fate matters LESS for 2nd gen\n")
cat("  • Not significant → Linked fate works the SAME for both generations\n\n")

cat("Effect of Linked Fate by Generation:\n")
cat("  • For 1st gen: β(Linked Fate)\n")
cat("  • For 2nd gen: β(Linked Fate) + β(2nd Gen × Linked Fate)\n\n")

# ---- CALCULATE EFFECTS BY GENERATION ----
cat("\n\n", strrep("=", 80), "\n")
cat("LINKED FATE EFFECT BY GENERATION\n")
cat(strrep("=", 80), "\n\n")

# Extract coefficients from interaction model
linkedfate_coef <- coef(m4_interaction)["group_consciousness"]
interaction_coef_val <- coef(m4_interaction)["generation2nd generation:group_consciousness"]

cat("Effect of Linked Fate Index on Democratic ID:\n")
cat(sprintf("  1st Generation: %.3f\n", linkedfate_coef))
cat(sprintf("  2nd Generation: %.3f (= %.3f + %.3f)\n", 
            linkedfate_coef + interaction_coef_val, 
            linkedfate_coef, 
            interaction_coef_val))

if (!is.na(interaction_coef_val)) {
  if (interaction_coef_val > 0) {
    cat("\n→ Linked fate has a STRONGER positive effect for 2nd generation\n")
  } else {
    cat("\n→ Linked fate has a WEAKER effect for 2nd generation\n")
  }
}

# ---- SUMMARY ----
cat("\n\n", strrep("=", 80), "\n")
cat("ANALYSIS COMPLETE\n")
cat(strrep("=", 80), "\n\n")

cat("LINKED FATE INDEX COMPONENTS:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("1. Linked fate with other [RACES] (Q4_3/Q4_3A)\n")
cat("2. Linked fate with [RETHNIC] Americans (Q4_4/Q4_4A)\n")
cat("3. Shared identity solidarity (Q4_5A-Q4_5D):\n")
cat("   - Shared race/ethnicity\n")
cat("   - Shared culture/language\n")
cat("   - Shared economic interests\n")
cat("   - Shared political interests\n\n")
cat("Final index = equal-weight average of all components (0-1 scale)\n\n")

cat("RESEARCH QUESTIONS:\n")
cat("1. Does the 2nd generation effect on Democratic ID persist after controlling for linked fate?\n")
cat("2. Does linked fate work DIFFERENTLY for 1st vs 2nd generation?\n\n")

cat("MODEL PROGRESSION:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Model 1 (Bivariate): Raw generational difference\n")
cat("Model 2 (+ Demographics): Generation effect after demographic controls\n")
cat("Model 3 (+ Linked Fate): Does linked fate explain the generational gap?\n")
cat("Model 4 (+ Interaction): Does linked fate work differently by generation?\n\n")

cat("KEY COMPARISON:\n")
cat("• Compare Model 2 vs Model 3: Does adding linked fate change 2nd gen coefficient?\n")
cat("• Check Model 4 interaction: Is the effect of linked fate different for 2nd gen?\n\n")

cat("✓ Analysis complete!\n\n")
cat("FILE SAVED:\n")
cat("  - Table_2ndGen_Democrat_LinkedFate_Interaction.html/.png\n\n")
```
CHAPTER FIVE: INTERACTION MODEL (no more three-way)
```{r}
# ============================================================
# DEMOCRATIC IDENTIFICATION: TWO-WAY INTERACTION MODEL
# Generation × Moral Conservatism × Linked Fate
# ============================================================

library(tidyverse)
library(broom)
library(stargazer)
library(webshot2)

# ---- PREPARE COMBINED DATA ----
dem_interactions <- NAAS2016 %>%
  mutate(across(everything(), haven::as_factor)) %>%
  mutate(
    # Generation
    generation = case_when(
      S9 == "(2) Another country" ~ "1st generation",
      S9 == "(1) United States" & Q1_1 == "(2) No" & Q1_2 == "(2) No" ~ "2nd generation",
      TRUE ~ NA_character_
    ),
    
    # DEPENDENT VARIABLE: Democrat (vs non-Democrat)
    democrat = case_when(
      Q10_0A == "(1) Democrat" ~ 1,
      Q10_0A %in% c("(2) Republican", "(3) Independent", 
                    "(4) In terms of some other party",
                    "(5) Do not think in terms of political parties") ~ 0,
      TRUE ~ NA_real_
    ),
    
    # KEY INDEPENDENT VARIABLE 1: Moral Conservatism (REVERSE of LGBTQ Support)
    lgbtq_support = rowMeans(
      cbind(
        case_when(Q8_1A %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1A %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1B %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1B %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_),
        case_when(Q8_1C %in% c("(4) Favor", "(5) Strongly favor") ~ 1,
                  Q8_1C %in% c("(1) Strongly oppose", "(2) Oppose") ~ 0, TRUE ~ NA_real_)
      ), na.rm = TRUE
    ),
    
    # Reverse code: higher = more morally conservative
    moral_conservatism = 1 - lgbtq_support,
    
    # KEY INDEPENDENT VARIABLE 2: Linked Fate / Group Consciousness (COMPREHENSIVE INDEX)
    
    # ---- Linked fate: "other [RACES] affects your life" (Q4_3/Q4_3A) ----
    lf_races = case_when(
      Q4_3 == "(1) Yes" & Q4_3A == "(1) A lot" ~ 1,
      Q4_3 == "(1) Yes" & Q4_3A == "(2) Some" ~ 0.67,
      Q4_3 == "(1) Yes" & Q4_3A == "(3) Not very much" ~ 0.33,
      Q4_3 == "(2) No" ~ 0,
      TRUE ~ NA_real_
    ),
    
    # ---- Linked fate: "other [RETHNIC] Americans affects your life" (Q4_4/Q4_4A) ----
    lf_rethnic = case_when(
      Q4_4 == "(1) Yes" & Q4_4A == "(1) A lot" ~ 1,
      Q4_4 == "(1) Yes" & Q4_4A == "(2) Some" ~ 0.67,
      Q4_4 == "(1) Yes" & Q4_4A == "(3) Not very much" ~ 0.33,
      Q4_4 == "(2) No" ~ 0,
      TRUE ~ NA_real_
    ),
    
    # ---- Shared-group identity items (Q4_5A–Q4_5D) ----
    share_race = case_when(
      Q4_5A %in% c("(1) Yes", "1") ~ 1,
      Q4_5A %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    share_culture = case_when(
      Q4_5B %in% c("(1) Yes", "1") ~ 1,
      Q4_5B %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    share_econ = case_when(
      Q4_5C %in% c("(1) Yes", "1") ~ 1,
      Q4_5C %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    share_political = case_when(
      Q4_5D %in% c("(1) Yes", "1") ~ 1,
      Q4_5D %in% c("(2) No",  "2") ~ 0,
      TRUE ~ NA_real_
    ),
    
    # Average of shared-identity items
    identity_solidarity = rowMeans(
      cbind(share_race, share_culture, share_econ, share_political),
      na.rm = TRUE
    ),
    
    # ---- Final composite: Linked Fate Index ----
    linked_fate = rowMeans(
      cbind(lf_races, lf_rethnic, identity_solidarity),
      na.rm = TRUE
    ),
    
    # Controls
    age_35plus = AGE2 == "(2) 35 or above",
    male = S7 == "(1) Male",
    college_plus = S8 %in% c("(5) College degree or Bachelor's degree", 
                             "(6) Graduate or Professional degree"),
    income_100k_plus = Q10_15 %in% c("(5) $100,000 to $125,000",
                                      "(6) $125,000 to $250,000",
                                      "(7) $250,000 and over"),
    
    religion_cat = case_when(
      Q10_21 %in% c("(03) Baptist", "(05) Catholic", "(06) Christian",
                    "(12) Episcopalian, Anglican", "(19) Lutheran", "(20) Methodist",
                    "(21) Mormon, Church of the Latter Day Saints", "(24) Pentecostal",
                    "(25) Presbyterian", "(26) Protestant (no denomination given)",
                    "(28) Seventh Day Adventist") ~ "Christian",
      Q10_21 %in% c("(04) Buddhist", "(15) Hindu", "(29) Sikh") ~ "Eastern Religion",
      Q10_21 %in% c("(01) Agnostic", "(02) Atheist", "(77) No religion",
                    "(78) Spiritual, but not religious") ~ "Non-religious",
      TRUE ~ "Other"
    ),
    
    ethnicity_major = case_when(
      RETHNICS == "(03) a Chinese" ~ "Chinese",
      RETHNICS == "(04) a Filipino" ~ "Filipino",
      RETHNICS == "(06) an Indian" ~ "Indian",
      RETHNICS == "(07) a Japanese" ~ "Japanese",
      RETHNICS == "(08) a Korean" ~ "Korean",
      RETHNICS == "(11) a Vietnamese" ~ "Vietnamese",
      TRUE ~ "Other"
    )
  ) %>%
  filter(generation %in% c("1st generation", "2nd generation")) %>%
  mutate(
    generation     = relevel(factor(generation),     ref = "1st generation"),
    religion_cat   = relevel(factor(religion_cat),   ref = "Christian"),
    ethnicity_major = factor(ethnicity_major)
  )

# ---- CHECK SAMPLE SIZES ----
cat("\n", strrep("=", 80), "\n")
cat("SAMPLE SIZES AND DESCRIPTIVE STATISTICS\n")
cat(strrep("=", 80), "\n\n")

dem_interactions %>%
  group_by(generation) %>%
  summarise(
    N_total        = n(),
    N_complete     = sum(!is.na(democrat) & !is.na(moral_conservatism) & !is.na(linked_fate)),
    Mean_moral_conserv = round(mean(moral_conservatism, na.rm = TRUE), 3),
    Mean_linked_fate   = round(mean(linked_fate,         na.rm = TRUE), 3),
    Pct_democrat       = round(100 * mean(democrat,      na.rm = TRUE), 1),
    .groups = "drop"
  ) %>% print()

# ---- BUILD MODELS PROGRESSIVELY ----
cat("\n", strrep("=", 80), "\n")
cat("BUILDING INTERACTION MODELS\n")
cat(strrep("=", 80), "\n\n")

# Model 1: Main effects only
m1_main <- glm(democrat ~ generation + moral_conservatism + linked_fate +
                 age_35plus + male + college_plus + income_100k_plus +
                 religion_cat + ethnicity_major,
               data = dem_interactions, family = binomial)

# Model 2: + Two-way interactions
m2_twoway <- glm(democrat ~ generation * moral_conservatism +
                   generation * linked_fate +
                   moral_conservatism * linked_fate +
                   age_35plus + male + college_plus + income_100k_plus +
                   religion_cat + ethnicity_major,
                 data = dem_interactions, family = binomial)

# ---- CREATE OUTPUT DIRECTORY ----
dir.create("~/Desktop/Thesis/NAAS2016/Fresh Output",
           showWarnings = FALSE, recursive = TRUE)

# ---- STARGAZER TABLE: INTERACTION MODELS ----
cat("\n=== TABLE: TWO-WAY INTERACTION MODEL ===\n\n")

stargazer(m1_main, m2_twoway,
          type = "html",
          title = "Democratic ID: Generation × Moral Conservatism × Linked Fate Interactions",
          dep.var.labels = "Democratic Identification",
          covariate.labels = c(
            "2nd Generation (vs 1st)",
            "Moral Conservatism",
            "Linked Fate",
            "2nd Gen × Moral Conserv",
            "2nd Gen × Linked Fate",
            "Moral Conserv × Linked Fate"
          ),
          column.labels = c("(1)<br>Main Effects",
                            "(2)<br>Two-Way<br>Interactions"),
          model.numbers = FALSE,
          keep = c("generation", "moral_conservatism", "linked_fate", ":"),
          ci = TRUE,
          ci.level = 0.95,
          ci.separator = ", ",
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          add.lines = list(
            c("Demographics", "Yes", "Yes"),
            c("Religion",     "Yes", "Yes"),
            c("Ethnicity",    "Yes", "Yes")
          ),
          notes = c(
            "<i>Notes:</i> Logistic regression coefficients with 95% confidence intervals.",
            "Dependent variable: Democrat = 1, Non-Democrat (Republican/Independent/Other/None) = 0.",
            "Moral Conservatism = 1 - LGBTQ Support (higher = more conservative on LGBTQ issues).",
            "Linked Fate = comprehensive index (0-1): linked fate with races/ethnicity + shared identity.",
            "Model 1: Main effects only. Model 2: All two-way interactions.",
            "Controls: age, gender, education, income, religion, ethnicity (included but not shown).",
            "*p<0.05; **p<0.01; ***p<0.001"
          ),
          notes.align  = "l",
          notes.append = FALSE,
          omit.stat = c("aic", "ll"),
          digits    = 3,
          font.size = "small",
          out = "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_Democrat_TwoWay_Interaction.html")

webshot("~/Desktop/Thesis/NAAS2016/Fresh Output/Table_Democrat_TwoWay_Interaction.html",
        "~/Desktop/Thesis/NAAS2016/Fresh Output/Table_Democrat_TwoWay_Interaction.png",
        vwidth = 1300, vheight = 900, zoom = 2)

# Text version
stargazer(m1_main, m2_twoway,
          type = "text",
          title = "Two-Way Interaction Model",
          column.labels = c("Main", "2-Way"),
          model.numbers = FALSE,
          keep = c("generation", "moral_conservatism", "linked_fate", ":"),
          ci = TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001),
          add.lines = list(
            c("Demographics", "Yes", "Yes"),
            c("Religion",     "Yes", "Yes"),
            c("Ethnicity",    "Yes", "Yes")
          ),
          omit.stat = c("aic", "ll"),
          digits = 3)

# ---- INTERPRETATION GUIDE ----
cat("\n\n", strrep("=", 80), "\n")
cat("INTERPRETATION GUIDE: TWO-WAY INTERACTIONS\n")
cat(strrep("=", 80), "\n\n")

cat("MODEL COMPONENTS:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Model 2 includes:\n")
cat("  β0: Intercept\n")
cat("  β1: 2nd Generation main effect\n")
cat("  β2: Moral Conservatism main effect\n")
cat("  β3: Linked Fate main effect\n")
cat("  β4: 2nd Gen × Moral Conservatism\n")
cat("  β5: 2nd Gen × Linked Fate\n")
cat("  β6: Moral Conservatism × Linked Fate\n")
cat("  + All demographic controls\n\n")

cat("HOW TO INTERPRET:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Main effects (β1-β3): Effects when other variables = 0\n\n")

cat("Two-way interactions:\n")
cat("  • β4: Does moral conservatism work differently for 2nd gen?\n")
cat("  • β5: Does linked fate work differently for 2nd gen?\n")
cat("  • β6: Does the effect of moral conservatism depend on linked fate?\n\n")

# ---- MODEL COMPARISON ----
cat("\n\n", strrep("=", 80), "\n")
cat("MODEL COMPARISON\n")
cat(strrep("=", 80), "\n\n")

lr_test_2way <- anova(m1_main, m2_twoway, test = "LRT")

cat("Likelihood Ratio Test:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("Model 1 vs Model 2 (adding two-way interactions):\n")
print(lr_test_2way)
cat("\n")

if (lr_test_2way$`Pr(>Chi)`[2] < 0.05) {
  cat("→ Two-way interactions significantly improve model fit (p < 0.05)\n\n")
} else {
  cat("→ Two-way interactions do NOT significantly improve model fit\n")
  cat("  Main effects model may be sufficient\n\n")
}

# ---- SUMMARY ----
cat("\n\n", strrep("=", 80), "\n")
cat("ANALYSIS COMPLETE\n")
cat(strrep("=", 80), "\n\n")

cat("RESEARCH QUESTION:\n")
cat("Does the relationship between moral conservatism and Democratic identification\n")
cat("depend on generation status and/or linked fate?\n\n")

cat("KEY VARIABLES:\n")
cat("─────────────────────────────────────────────────────────────\n")
cat("• Generation: 1st gen (foreign-born) vs 2nd gen (US-born, both parents FB)\n")
cat("• Moral Conservatism: 0 (progressive on LGBTQ) to 1 (conservative on LGBTQ)\n")
cat("• Linked Fate: 0 (no linked fate) to 1 (strong linked fate/group consciousness)\n\n")

cat("✓ Analysis complete!\n\n")
cat("FILE SAVED:\n")
cat("  - Table_Democrat_TwoWay_Interaction.html/.png\n\n")
```
Chapter 5: Comparing number of 2nd gens who are morally conservative:
```{r}
# ============================================================
# DISTRIBUTION ANALYSIS: MORAL CONSERVATISM vs LINKED FATE
# Among Second-Generation Asian Americans
# ============================================================

# ---- FILTER TO 2ND GENERATION ----
second_gen <- dem_interactions %>%
  filter(generation == "2nd generation") %>%
  filter(!is.na(moral_conservatism), !is.na(linked_fate))

cat("\n", strrep("=", 80), "\n")
cat("2ND GENERATION: SAMPLE SIZE\n")
cat(strrep("=", 80), "\n\n")
cat(sprintf("N (complete cases on both key vars): %d\n\n", nrow(second_gen)))

# ============================================================
# SECTION 1: CONTINUOUS DISTRIBUTIONS
# ============================================================

cat(strrep("=", 80), "\n")
cat("SECTION 1: CONTINUOUS DISTRIBUTIONS\n")
cat(strrep("=", 80), "\n\n")

# Summary statistics side by side
cat("── Summary Statistics ──────────────────────────────────────\n\n")

summary_stats <- second_gen %>%
  summarise(
    across(
      c(moral_conservatism, linked_fate),
      list(
        Mean   = ~round(mean(.,  na.rm = TRUE), 3),
        Median = ~round(median(., na.rm = TRUE), 3),
        SD     = ~round(sd(.,   na.rm = TRUE), 3),
        Min    = ~round(min(.,  na.rm = TRUE), 3),
        Max    = ~round(max(.,  na.rm = TRUE), 3)
      ),
      .names = "{.col}__{.fn}"
    )
  ) %>%
  pivot_longer(everything(),
               names_to  = c("Variable", "Stat"),
               names_sep = "__") %>%
  pivot_wider(names_from = Stat, values_from = value)

print(summary_stats)

# ---- HISTOGRAM: MORAL CONSERVATISM ----
cat("\n── Histogram: Moral Conservatism (0 = progressive, 1 = conservative) ──\n\n")

mc_hist <- second_gen %>%
  mutate(bin = cut(moral_conservatism,
                   breaks = seq(0, 1, by = 0.1),
                   include.lowest = TRUE)) %>%
  count(bin) %>%
  mutate(pct = round(100 * n / sum(n), 1))

print(mc_hist)

# ---- HISTOGRAM: LINKED FATE ----
cat("\n── Histogram: Linked Fate (0 = none, 1 = strong) ──\n\n")

lf_hist <- second_gen %>%
  mutate(bin = cut(linked_fate,
                   breaks = seq(0, 1, by = 0.1),
                   include.lowest = TRUE)) %>%
  count(bin) %>%
  mutate(pct = round(100 * n / sum(n), 1))

print(lf_hist)

# ============================================================
# SECTION 2: THRESHOLD-BASED PATHWAY COUNTS
# ============================================================

cat("\n\n", strrep("=", 80), "\n")
cat("SECTION 2: PATHWAY PREVALENCE (THRESHOLD-BASED)\n")
cat(strrep("=", 80), "\n\n")

# Three thresholds to avoid arbitrary single cut
thresholds <- c(0.50, 0.60, 0.67)

cat("── How Many Fall Above Each Threshold? ────────────────────\n\n")
cat(sprintf("%-12s  %-30s  %-30s\n",
            "Threshold", "High Moral Conservatism", "High Linked Fate"))
cat(strrep("-", 76), "\n")

for (t in thresholds) {
  n_mc  <- sum(second_gen$moral_conservatism >= t, na.rm = TRUE)
  n_lf  <- sum(second_gen$linked_fate        >= t, na.rm = TRUE)
  pct_mc <- round(100 * n_mc / nrow(second_gen), 1)
  pct_lf <- round(100 * n_lf / nrow(second_gen), 1)
  cat(sprintf("≥ %-9.2f  %d (%.1f%%)%22s  %d (%.1f%%)\n",
              t, n_mc, pct_mc, "", n_lf, pct_lf))
}

# ============================================================
# SECTION 3: FOUR-CELL PATHWAY CROSSTAB (at 0.50 threshold)
# ============================================================

cat("\n\n", strrep("=", 80), "\n")
cat("SECTION 3: FOUR-CELL PATHWAY CROSSTAB (threshold = 0.50)\n")
cat(strrep("=", 80), "\n\n")

pathway_data <- second_gen %>%
  mutate(
    high_mc = moral_conservatism >= 0.50,
    high_lf = linked_fate        >= 0.50,
    pathway = case_when(
      high_mc & high_lf  ~ "Both pathways",
      high_mc & !high_lf ~ "Moral conservatism only",
      !high_mc & high_lf ~ "Linked fate only",
      TRUE               ~ "Neither"
    )
  )

pathway_summary <- pathway_data %>%
  group_by(pathway) %>%
  summarise(
    N           = n(),
    Pct         = round(100 * n() / nrow(pathway_data), 1),
    Pct_Dem     = round(100 * mean(democrat, na.rm = TRUE), 1),
    .groups = "drop"
  ) %>%
  arrange(desc(N))

cat("Pathway breakdown (with % identifying as Democrat):\n\n")
print(pathway_summary)

# Raw crosstab
cat("\n── Raw Crosstab: High Moral Conservatism × High Linked Fate ──\n\n")
crosstab <- pathway_data %>%
  count(high_mc, high_lf) %>%
  mutate(
    pct   = round(100 * n / sum(n), 1),
    label = sprintf("High MC = %-5s | High LF = %-5s",
                    high_mc, high_lf)
  ) %>%
  select(label, n, pct)

print(crosstab)

# ============================================================
# SECTION 4: TERTILE BREAKDOWN (low / medium / high)
# ============================================================

cat("\n\n", strrep("=", 80), "\n")
cat("SECTION 4: TERTILE BREAKDOWN\n")
cat(strrep("=", 80), "\n\n")

tertile_data <- second_gen %>%
  mutate(
    mc_tertile = ntile(moral_conservatism, 3),
    lf_tertile = ntile(linked_fate,        3),
    mc_group   = recode(mc_tertile, `1` = "Low MC", `2` = "Mid MC", `3` = "High MC"),
    lf_group   = recode(lf_tertile, `1` = "Low LF", `2` = "Mid LF", `3` = "High LF")
  )

cat("── Moral Conservatism Tertiles ─────────────────────────────\n\n")
tertile_data %>%
  group_by(mc_group) %>%
  summarise(
    N           = n(),
    Pct         = round(100 * n() / nrow(tertile_data), 1),
    Range_low   = round(min(moral_conservatism),  3),
    Range_high  = round(max(moral_conservatism),  3),
    Pct_Dem     = round(100 * mean(democrat, na.rm = TRUE), 1),
    .groups = "drop"
  ) %>% print()

cat("\n── Linked Fate Tertiles ─────────────────────────────────────\n\n")
tertile_data %>%
  group_by(lf_group) %>%
  summarise(
    N           = n(),
    Pct         = round(100 * n() / nrow(tertile_data), 1),
    Range_low   = round(min(linked_fate),  3),
    Range_high  = round(max(linked_fate),  3),
    Pct_Dem     = round(100 * mean(democrat, na.rm = TRUE), 1),
    .groups = "drop"
  ) %>% print()

# ============================================================
# SECTION 5: CALLOUT SUMMARY
# ============================================================

cat("\n\n", strrep("=", 80), "\n")
cat("SECTION 5: CALLOUT SUMMARY — WHICH PATHWAY IS MORE COMMON?\n")
cat(strrep("=", 80), "\n\n")

n_total <- nrow(second_gen)
n_mc_50 <- sum(second_gen$moral_conservatism >= 0.50, na.rm = TRUE)
n_lf_50 <- sum(second_gen$linked_fate        >= 0.50, na.rm = TRUE)

cat(sprintf("Among %d second-generation respondents with complete data:\n\n", n_total))
cat(sprintf("  • High moral conservatism (≥ 0.50):  %d (%.1f%%)\n",
            n_mc_50, 100 * n_mc_50 / n_total))
cat(sprintf("  • High linked fate (≥ 0.50):          %d (%.1f%%)\n\n",
            n_lf_50, 100 * n_lf_50 / n_total))

dominant <- if (n_lf_50 > n_mc_50) "linked fate" else "moral conservatism"
minority  <- if (n_lf_50 > n_mc_50) "moral conservatism" else "linked fate"

cat(sprintf(
  "→ The MORE common pathway is HIGH %s.\n",
  toupper(dominant)
))
cat(sprintf(
  "  High %s is less common among 2nd-generation respondents,\n",
  minority
))
cat(sprintf(
  "  suggesting that while moral conservatism may suppress\n"
))
cat(sprintf(
  "  Democratic ID, it affects a smaller share of this group\n"
))
cat(sprintf(
  "  than does the pro-Democrat pull of linked fate.\n\n"
))

cat("  Substantive implication:\n")
cat("  If most 2nd-gen respondents score HIGH on linked fate\n")
cat("  but only a minority score HIGH on moral conservatism,\n")
cat("  the net partisan pull for this generation likely skews\n")
cat("  Democratic — consistent with the broad Asian American\n")
cat("  partisan alignment observed in 2016 NAAS data.\n\n")

cat("✓ Distribution analysis complete.\n\n")

# ============================================================
# SAVE DISTRIBUTION ANALYSIS OUTPUTS AS PNGs
# ============================================================

library(tidyverse)
library(scales)
library(gt)
library(webshot2)

output_dir <- "~/Desktop/Thesis/NAAS2016/Fresh Output"
dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)

N2 <- nrow(second_gen)  # convenience scalar

# ---- SHARED THEME ----
thesis_theme <- theme_minimal(base_size = 13) +
  theme(
    plot.title       = element_text(face = "bold", size = 14),
    plot.subtitle    = element_text(color = "grey40", size = 11),
    plot.caption     = element_text(color = "grey50", size = 9, hjust = 0),
    axis.title       = element_text(size = 11),
    panel.grid.minor = element_blank(),
    plot.margin      = margin(15, 15, 15, 15),
    plot.background  = element_rect(fill = "white", color = NA)
  )

pal <- c("Moral Conservatism" = "#C0392B", "Linked Fate" = "#2980B9")

# ============================================================
# PLOT 1: SIDE-BY-SIDE HISTOGRAMS
# ============================================================

p1_data <- second_gen %>%
  select(moral_conservatism, linked_fate) %>%
  pivot_longer(everything(),
               names_to  = "variable",
               values_to = "value") %>%
  mutate(variable = recode(variable,
                           "moral_conservatism" = "Moral Conservatism",
                           "linked_fate"        = "Linked Fate"))

p1 <- ggplot(p1_data, aes(x = value, fill = variable)) +
  geom_histogram(binwidth = 0.1, boundary = 0,
                 color = "white", alpha = 0.85) +
  facet_wrap(~variable, ncol = 2) +
  scale_fill_manual(values = pal) +
  scale_x_continuous(breaks = seq(0, 1, 0.25), limits = c(-0.05, 1.05)) +
  labs(
    title    = "Figure 1. Distributions of Moral Conservatism and Linked Fate",
    subtitle = "Second-Generation Asian Americans | NAAS 2016",
    x        = "Score (0 = low, 1 = high)",
    y        = "Count",
    caption  = paste0(
      "N = ", N2, ".  ",
      "Moral Conservatism = 1 − LGBTQ Support Index (higher = more conservative).\n",
      "Linked Fate = composite index of linked fate items + shared identity ",
      "(higher = stronger group consciousness)."
    )
  ) +
  thesis_theme +
  theme(legend.position = "none",
        strip.text = element_text(face = "bold", size = 12))

ggsave(file.path(output_dir, "Dist_01_Histograms.png"),
       p1, width = 10, height = 5, dpi = 300, bg = "white")
cat("✓ Saved: Dist_01_Histograms.png\n")

# ============================================================
# PLOT 2: THRESHOLD COMPARISON BAR CHART
# ============================================================

threshold_vals   <- c(0.50, 0.60, 0.67)
threshold_labels <- c("≥ 0.50", "≥ 0.60", "≥ 0.67")

p2_data <- tibble(
  threshold = factor(rep(threshold_labels, 2), levels = threshold_labels),
  variable  = rep(c("Moral Conservatism", "Linked Fate"), each = 3),
  n = c(
    sapply(threshold_vals, \(t) sum(second_gen$moral_conservatism >= t, na.rm = TRUE)),
    sapply(threshold_vals, \(t) sum(second_gen$linked_fate        >= t, na.rm = TRUE))
  )
) %>%
  mutate(pct = round(100 * n / N2, 1))

p2 <- ggplot(p2_data, aes(x = threshold, y = pct, fill = variable)) +
  geom_col(position = position_dodge(width = 0.65), width = 0.55) +
  geom_text(aes(label = sprintf("%.1f%%\n(n=%d)", pct, n)),
            position  = position_dodge(width = 0.65),
            vjust     = -0.3, size = 3.5, lineheight = 1.2) +
  scale_fill_manual(values = pal, name = NULL) +
  scale_y_continuous(limits = c(0, 105),
                     labels = percent_format(scale = 1)) +
  labs(
    title    = "Figure 2. Pathway Prevalence at Multiple Thresholds",
    subtitle = "% of 2nd-Generation Respondents Scoring 'High' on Each Variable",
    x        = "Threshold (score must meet or exceed)",
    y        = "% of Respondents",
    caption  = paste0(
      "N = ", N2, ".  ",
      "Three thresholds shown to avoid dependence on any single cut-point."
    )
  ) +
  thesis_theme +
  theme(legend.position = "top",
        legend.text = element_text(size = 11))

ggsave(file.path(output_dir, "Dist_02_Threshold_Comparison.png"),
       p2, width = 9, height = 6, dpi = 300, bg = "white")
cat("✓ Saved: Dist_02_Threshold_Comparison.png\n")

# ============================================================
# PLOT 3: FOUR-CELL HEATMAP
# ============================================================

p3_data <- second_gen %>%
  mutate(
    high_mc = moral_conservatism >= 0.50,
    high_lf = linked_fate        >= 0.50
  ) %>%
  group_by(high_mc, high_lf) %>%
  summarise(
    N       = n(),
    pct     = round(100 * n() / N2, 1),
    pct_dem = round(100 * mean(democrat, na.rm = TRUE), 1),
    .groups = "drop"
  ) %>%
  mutate(
    x_label = factor(
      ifelse(high_lf, "High\nLinked Fate", "Low\nLinked Fate"),
      levels = c("Low\nLinked Fate", "High\nLinked Fate")
    ),
    y_label = factor(
      ifelse(high_mc, "High\nMoral Conservatism", "Low\nMoral Conservatism"),
      levels = c("High\nMoral Conservatism", "Low\nMoral Conservatism")
    )
  )

p3 <- ggplot(p3_data, aes(x = x_label, y = y_label, fill = pct)) +
  geom_tile(color = "white", linewidth = 2) +
  geom_text(aes(label = sprintf("N = %d\n%.1f%% of 2nd gen\n%.1f%% Democrat",
                                N, pct, pct_dem)),
            size = 4.2, lineheight = 1.5, fontface = "bold") +
  scale_fill_gradient(low = "#D6EAF8", high = "#2471A3",
                      name = "% of\n2nd gen") +
  labs(
    title    = "Four-Cell Pathway Crosstab (Threshold = 0.50)",
    subtitle = "2nd-Generation Respondents: Moral Conservatism × Linked Fate",
    x        = NULL,
    y        = NULL,
    caption  = "Democrat % shown within each cell. Cut-point = 0.50 on 0–1 scale."
  ) +
  thesis_theme +
  theme(
    panel.grid      = element_blank(),
    axis.text       = element_text(size = 12, face = "bold"),
    legend.position = "right"
  )

ggsave(file.path(output_dir, "Dist_03_FourCell_Heatmap.png"),
       p3, width = 9, height = 6, dpi = 300, bg = "white")
cat("✓ Saved: Dist_03_FourCell_Heatmap.png\n")

# ============================================================
# PLOT 4: TERTILE DEMOCRAT % GROUPED BAR CHART
# ============================================================

make_tertile_summary <- function(df, var, var_label) {
  df %>%
    mutate(
      tert  = ntile({{ var }}, 3),
      label = recode(tert,
                     `1` = "Low\n(bottom 3rd)",
                     `2` = "Mid\n(middle 3rd)",
                     `3` = "High\n(top 3rd)"),
      var_name = var_label
    ) %>%
    group_by(var_name, label, tert) %>%
    summarise(pct_dem = 100 * mean(democrat, na.rm = TRUE),
              N = n(), .groups = "drop")
}

p4_data <- bind_rows(
  make_tertile_summary(second_gen, moral_conservatism, "Moral Conservatism"),
  make_tertile_summary(second_gen, linked_fate,        "Linked Fate")
) %>%
  mutate(
    var_name = factor(var_name, levels = c("Moral Conservatism", "Linked Fate")),
    label    = factor(label, levels = c("Low\n(bottom 3rd)",
                                        "Mid\n(middle 3rd)",
                                        "High\n(top 3rd)"))
  )

p4 <- ggplot(p4_data, aes(x = label, y = pct_dem, fill = var_name)) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = sprintf("%.1f%%\n(n=%d)", pct_dem, N)),
            vjust = -0.3, size = 3.8, lineheight = 1.2) +
  facet_wrap(~var_name, scales = "free_x") +
  scale_fill_manual(values = pal) +
  scale_y_continuous(limits = c(0, 105),
                     labels = percent_format(scale = 1)) +
  labs(
    title    = "Figure 4. % Identifying as Democrat, by Tertile",
    subtitle = "2nd-Generation Asian Americans | NAAS 2016",
    x        = NULL,
    y        = "% Identifying as Democrat",
    caption  = "Tertiles divide respondents into equal thirds by score on each variable."
  ) +
  thesis_theme +
  theme(strip.text = element_text(face = "bold", size = 12))

ggsave(file.path(output_dir, "Dist_04_Tertile_Democrat_Pct.png"),
       p4, width = 10, height = 6, dpi = 300, bg = "white")
cat("✓ Saved: Dist_04_Tertile_Democrat_Pct.png\n")

# ============================================================
# PLOT 5: SUMMARY TABLE (gt → PNG)
# ============================================================

summary_tbl_data <- tibble(
  Variable = c("Moral Conservatism", "Linked Fate"),
  Mean     = c(round(mean(second_gen$moral_conservatism, na.rm = TRUE), 3),
               round(mean(second_gen$linked_fate,        na.rm = TRUE), 3)),
  Median   = c(round(median(second_gen$moral_conservatism, na.rm = TRUE), 3),
               round(median(second_gen$linked_fate,        na.rm = TRUE), 3)),
  SD       = c(round(sd(second_gen$moral_conservatism, na.rm = TRUE), 3),
               round(sd(second_gen$linked_fate,        na.rm = TRUE), 3)),
  pct_50   = c(round(100 * mean(second_gen$moral_conservatism >= 0.50, na.rm = TRUE), 1),
               round(100 * mean(second_gen$linked_fate        >= 0.50, na.rm = TRUE), 1)),
  pct_60   = c(round(100 * mean(second_gen$moral_conservatism >= 0.60, na.rm = TRUE), 1),
               round(100 * mean(second_gen$linked_fate        >= 0.60, na.rm = TRUE), 1)),
  pct_67   = c(round(100 * mean(second_gen$moral_conservatism >= 0.67, na.rm = TRUE), 1),
               round(100 * mean(second_gen$linked_fate        >= 0.67, na.rm = TRUE), 1))
)

# ---- REPLACE the gt save block entirely ----
# (delete everything from gt_out <- ... down to the webshot2 line)

html_path <- file.path(output_dir, "Dist_05_Summary_Table.html")
png_path  <- file.path(output_dir, "Dist_05_Summary_Table.png")

# Build the HTML table as a plain string — no gt, no xfun
html_table <- paste0('
<!DOCTYPE html>
<html>
<head>
<style>
  body    { font-family: Georgia, serif; margin: 20px; background: white; }
  table   { border-collapse: collapse; width: 92%; margin: auto; font-size: 14px; }
  caption { font-size: 16px; font-weight: bold; color: white;
            background: #2C3E50; padding: 10px 14px; text-align: left; }
  .subtitle { font-size: 12px; font-weight: normal; color: #ccc; }
  .spanner  { background: #566573; color: white; font-weight: bold;
              text-align: center; padding: 7px 10px;
              border: 1px solid #aaa; }
  thead tr.colhead th { background: #566573; color: white; font-weight: bold;
                        padding: 8px 12px; text-align: center;
                        border: 1px solid #aaa; }
  tbody tr td   { padding: 8px 12px; text-align: center;
                  border: 1px solid #ddd; }
  .var-col      { text-align: left; font-weight: bold; }
  .mc-row       { background: #FADBD8; }
  .lf-row       { background: #D6EAF8; }
  tfoot td      { font-size: 11px; color: #555; padding: 6px 10px;
                  text-align: left; border-top: 1px solid #bbb; }
</style>
</head>
<body>
<table>
  <caption>
    Moral Conservatism vs. Linked Fate: Summary Statistics<br>
    <span class="subtitle">
      Second-Generation Asian Americans &nbsp;|&nbsp; NAAS 2016 &nbsp;|&nbsp; N = ',
      N2, '
    </span>
  </caption>
  <thead>
    <tr>
      <th class="spanner" rowspan="2" style="text-align:left;">Variable</th>
      <th class="spanner" colspan="3">Distribution</th>
      <th class="spanner" colspan="3">% Scoring &lsquo;High&rsquo; (above threshold)</th>
    </tr>
    <tr class="colhead">
      <th>Mean</th><th>Median</th><th>SD</th>
      <th>&ge; 0.50</th><th>&ge; 0.60</th><th>&ge; 0.67</th>
    </tr>
  </thead>
  <tbody>
    <tr class="mc-row">
      <td class="var-col">Moral Conservatism</td>
      <td>', summary_tbl_data$Mean[1],   '</td>
      <td>', summary_tbl_data$Median[1], '</td>
      <td>', summary_tbl_data$SD[1],     '</td>
      <td>', summary_tbl_data$pct_50[1], '%</td>
      <td>', summary_tbl_data$pct_60[1], '%</td>
      <td>', summary_tbl_data$pct_67[1], '%</td>
    </tr>
    <tr class="lf-row">
      <td class="var-col">Linked Fate</td>
      <td>', summary_tbl_data$Mean[2],   '</td>
      <td>', summary_tbl_data$Median[2], '</td>
      <td>', summary_tbl_data$SD[2],     '</td>
      <td>', summary_tbl_data$pct_50[2], '%</td>
      <td>', summary_tbl_data$pct_60[2], '%</td>
      <td>', summary_tbl_data$pct_67[2], '%</td>
    </tr>
  </tbody>
  <tfoot>
    <tr>
      <td colspan="7">
        <em>Note:</em> Moral Conservatism = 1 &minus; LGBTQ Support Index.
        Linked Fate = composite index of linked fate + shared identity items.
        Both variables scored 0&ndash;1.
      </td>
    </tr>
  </tfoot>
</table>
</body>
</html>')

writeLines(html_table, html_path)
webshot2::webshot(html_path, png_path, vwidth = 1100, vheight = 320, zoom = 2)
cat("✓ Saved: Dist_05_Summary_Table.png\n")

# ============================================================
# CONFIRMATION
# ============================================================

cat("\n", strrep("=", 60), "\n")
cat("ALL OUTPUTS SAVED TO:\n")
cat(output_dir, "\n")
cat(strrep("=", 60), "\n\n")
cat("  Dist_01_Histograms.png\n")
cat("  Dist_02_Threshold_Comparison.png\n")
cat("  Dist_03_FourCell_Heatmap.png\n")
cat("  Dist_04_Tertile_Democrat_Pct.png\n")
cat("  Dist_05_Summary_Table.png\n\n")
```


